diff --git a/deps.json b/deps.json index eb92de3e4..ba55345b3 100644 --- a/deps.json +++ b/deps.json @@ -14,7 +14,7 @@ "subrepo" : "KhronosGroup/SPIRV-Headers", "branch" : "main", "subdir" : "third_party/SPIRV-Headers", - "commit" : "fc7d2462765183c784a0c46beb13eee9e506a067" + "commit" : "4183b260f4cccae52a89efdfcdd43c4897989f42" }, { "name" : "SPIRV-Tools", diff --git a/include/clspv/PushConstant.h b/include/clspv/PushConstant.h index b9c70ade0..0fec5c490 100644 --- a/include/clspv/PushConstant.h +++ b/include/clspv/PushConstant.h @@ -29,11 +29,13 @@ enum class PushConstant : int { ImageMetadata, ModuleConstantsPointer, PrintfBufferPointer, + NormalizedSamplerMask, }; enum class ImageMetadata : int { ChannelOrder, ChannelDataType, + NormalizedSamplerMask, }; // Returns the name of the push constant from its enum. diff --git a/lib/AutoPodArgsPass.cpp b/lib/AutoPodArgsPass.cpp index 266ed762e..92d32f20e 100644 --- a/lib/AutoPodArgsPass.cpp +++ b/lib/AutoPodArgsPass.cpp @@ -28,6 +28,7 @@ #include "Constants.h" #include "Layout.h" #include "PushConstant.h" +#include "SamplerUtils.h" #include "Types.h" using namespace llvm; @@ -83,6 +84,36 @@ bool FunctionContainsImageChannelGetter(Function *F) { } return false; } +bool FunctionContainsReadImage3DNonLiteralSampler(Function *F) { + std::set visited_fct; + SmallVector fcts_to_visit; + fcts_to_visit.push_back(F); + while (!fcts_to_visit.empty()) { + SmallVector next_fcts_to_visit; + for (auto *fct : fcts_to_visit) { + visited_fct.insert(fct); + for (auto &BB : *fct) { + for (auto &I : BB) { + if (auto call = dyn_cast(&I)) { + auto Name = call->getCalledFunction()->getName(); + if (Name.contains("read_image")) { + if (clspv::isReadImage3DWithNonLiteralSampler(call)) { + return true; + } + } else { + Function *f = call->getCalledFunction(); + if (visited_fct.count(f) == 0) { + next_fcts_to_visit.push_back(f); + } + } + } + } + } + } + fcts_to_visit = std::move(next_fcts_to_visit); + } + return false; +} } // namespace void clspv::AutoPodArgsPass::runOnFunction(Function &F) { @@ -118,6 +149,8 @@ void clspv::AutoPodArgsPass::runOnFunction(Function &F) { } } const bool contains_image_channel_getter = FunctionContainsImageChannelGetter(&F); + const bool contains_read_image_3d_non_literal_sampler = + FunctionContainsReadImage3DNonLiteralSampler(&F); // Per-kernel push constant interface requires: // 1. Clustered pod args. @@ -127,6 +160,7 @@ void clspv::AutoPodArgsPass::runOnFunction(Function &F) { // 5. If 16-bit types are used, 16-bit push constants are supported. // 6. If 8-bit types are used, 8-bit push constants are supported. // 7. Not to have a image channel getter function call. + // 8. Not to have a read_image of 3d image with a non-literal sampler. const auto pod_struct_ty = StructType::get(M.getContext(), pod_types); const bool contains_array = ContainsArrayType(pod_struct_ty); const bool support_16bit_pc = !ContainsSizedType(pod_struct_ty, 16) || @@ -144,7 +178,8 @@ void clspv::AutoPodArgsPass::runOnFunction(Function &F) { clspv::Option::ClusterPodKernelArgs() && support_16bit_pc && support_8bit_pc && fits_push_constant && !clspv::UsesGlobalPushConstants(M) && !contains_array && - !contains_image_channel_getter; + !contains_image_channel_getter && + !contains_read_image_3d_non_literal_sampler; // Global type-mangled push constants require: // 1. Clustered pod args. diff --git a/lib/BuiltinsEnum.h b/lib/BuiltinsEnum.h index 388f03b6d..2704b5d28 100644 --- a/lib/BuiltinsEnum.h +++ b/lib/BuiltinsEnum.h @@ -29,7 +29,7 @@ enum BuiltinType : unsigned int { kSpirvCopyMemory, kClspvSamplerVarLiteral, kClspvCompositeConstruct, - kClspvGetImageSizes, + kClspvGetNormalizedSamplerMask, kType_Clspv_End, kType_Async_Start, diff --git a/lib/BuiltinsMap.inc b/lib/BuiltinsMap.inc index 5459cf9da..cc6ca6291 100644 --- a/lib/BuiltinsMap.inc +++ b/lib/BuiltinsMap.inc @@ -1117,7 +1117,7 @@ static std::unordered_map + +using namespace llvm; + +#define DEBUG_TYPE "inlinefuncwithreadimage3dnonliteralsamplerpass" + +PreservedAnalyses clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass::run( + Module &M, ModuleAnalysisManager &) { + PreservedAnalyses PA; + + // Loop through our inline pass until they stop changing thing. + bool changed = true; + while (changed) { + changed &= InlineFunctions(M); + } + + return PA; +} + +static bool FunctionShouldBeInlined(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + // If we have a call instruction... + if (auto call = dyn_cast(&I)) { + // ...which is calling read_image with a 3d image and a non literal + // sampler + if (clspv::isReadImage3DWithNonLiteralSampler(call)) { + return true; + } + } + } + } + return false; +} + +static bool FunctionContainsReadImageWithSampler(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + // If we have a call instruction... + if (auto call = dyn_cast(&I)) { + auto Name = call->getCalledFunction()->getName(); + if (Name.contains("read_image") && Name.contains("ocl_sampler")) { + return true; + } + } + } + } + return false; +} + +bool clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass::InlineFunctions( + Module &M) { + bool Changed = false; + + UniqueVector WorkList; + std::set FunctionToInline; + for (Function &F : M) { + if (F.isDeclaration() || F.getCallingConv() == CallingConv::SPIR_KERNEL) { + continue; + } + if (FunctionShouldBeInlined(F)) { + FunctionToInline.insert(&F); + } + } + + if (FunctionToInline.empty()) { + return false; + } + + // If we detect a read image of a 3D image with a non literal sampler, we need + // to inline every function with read_image because they might be using a non + // literal sampler used to read a 3D image, thus also needing a rework. + for (Function &F : M) { + if (F.isDeclaration() || F.getCallingConv() == CallingConv::SPIR_KERNEL) { + continue; + } + if (FunctionContainsReadImageWithSampler(F)) { + FunctionToInline.insert(&F); + } + } + + for (Function &F : M) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + // If we have a call instruction... + if (auto call = dyn_cast(&I)) { + // ...which is calling a function to inline + if (FunctionToInline.count(call->getCalledFunction()) > 0) { + WorkList.insert(call); + } + } + } + } + } + + for (CallInst *Call : WorkList) { + InlineFunctionInfo IFI; + Changed |= InlineFunction(*Call, IFI, false, nullptr, false).isSuccess(); + } + + return Changed; +} diff --git a/lib/InlineFuncWithReadImage3DNonLiteralSampler.h b/lib/InlineFuncWithReadImage3DNonLiteralSampler.h new file mode 100644 index 000000000..5859cbfc7 --- /dev/null +++ b/lib/InlineFuncWithReadImage3DNonLiteralSampler.h @@ -0,0 +1,32 @@ +// Copyright 2023 The Clspv Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +#ifndef _CLSPV_LIB_INLINE_READ_IMAGE3D_NON_LITERAL_SAMPLER_H +#define _CLSPV_LIB_INLINE_READ_IMAGE3D_NON_LITERAL_SAMPLER_H + +namespace clspv { + +struct InlineFuncWithReadImage3DNonLiteralSamplerPass + : llvm::PassInfoMixin { + llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &); + + bool InlineFunctions(llvm::Module &M); +}; +} // namespace clspv + +#endif // _CLSPV_LIB_INLINE_READ_IMAGE3D_NON_LITERAL_SAMPLER_H diff --git a/lib/PassRegistry.def b/lib/PassRegistry.def index bc92a356e..19ee97d10 100644 --- a/lib/PassRegistry.def +++ b/lib/PassRegistry.def @@ -33,11 +33,12 @@ MODULE_PASS("inline-entry-points-pass", clspv::InlineEntryPointsPass) MODULE_PASS("inline-func-with-image-metadata-getter", clspv::InlineFuncWithImageMetadataGetterPass) MODULE_PASS("inline-func-with-pointer-cast-arg", clspv::InlineFuncWithPointerBitCastArgPass) MODULE_PASS("inline-func-with-pointer-function-arg", clspv::InlineFuncWithPointerToFunctionArgPass) +MODULE_PASS("inline-func-with-read-image3d-non-literal-sampler", clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass) MODULE_PASS("inline-func-with-single-call-site", clspv::InlineFuncWithSingleCallSitePass) MODULE_PASS("kernel-argnames-to-metadata", clspv::KernelArgNamesToMetadataPass) MODULE_PASS("logical-pointer-to-int", clspv::LogicalPointerToIntPass) MODULE_PASS("long-vector-lowering", clspv::LongVectorLoweringPass) -MODULE_PASS("set-image-channel-metadata", clspv::SetImageChannelMetadataPass) +MODULE_PASS("set-image-metadata", clspv::SetImageMetadataPass) MODULE_PASS("lower-addrspacecast", clspv::LowerAddrSpaceCastPass) MODULE_PASS("lower-private-pointer-phi", clspv::LowerPrivatePointerPHIPass) MODULE_PASS("multi-version-ubo-functions", clspv::MultiVersionUBOFunctionsPass) diff --git a/lib/Passes.h b/lib/Passes.h index a9c2d8301..6b5372864 100644 --- a/lib/Passes.h +++ b/lib/Passes.h @@ -32,6 +32,7 @@ #include "InlineFuncWithImageMetadataGetterPass.h" #include "InlineFuncWithPointerBitCastArgPass.h" #include "InlineFuncWithPointerToFunctionArgPass.h" +#include "InlineFuncWithReadImage3DNonLiteralSampler.h" #include "InlineFuncWithSingleCallSitePass.h" #include "KernelArgNamesToMetadataPass.h" #include "LogicalPointerToIntPass.h" @@ -52,7 +53,7 @@ #include "RewritePackedStructs.h" #include "SPIRVProducerPass.h" #include "ScalarizePass.h" -#include "SetImageChannelMetadataPass.h" +#include "SetImageMetadataPass.h" #include "ShareModuleScopeVariables.h" #include "SignedCompareFixupPass.h" #include "SimplifyPointerBitcastPass.h" diff --git a/lib/PushConstant.cpp b/lib/PushConstant.cpp index 681de7f16..988b0a79d 100644 --- a/lib/PushConstant.cpp +++ b/lib/PushConstant.cpp @@ -55,6 +55,8 @@ const char *GetPushConstantName(PushConstant pc) { return "module_constants_pointer"; case PushConstant::PrintfBufferPointer: return "printf_buffer_pointer"; + case PushConstant::NormalizedSamplerMask: + return "normalized_sampler_mask"; } llvm_unreachable("Unknown PushConstant in GetPushConstantName"); return ""; @@ -83,6 +85,8 @@ Type *GetPushConstantType(Module &M, PushConstant pc) { return IntegerType::get(C, 64); case PushConstant::PrintfBufferPointer: return IntegerType::get(C, 64); + case PushConstant::NormalizedSamplerMask: + return IntegerType::get(C, 64); default: break; } diff --git a/lib/ReplaceOpenCLBuiltinPass.cpp b/lib/ReplaceOpenCLBuiltinPass.cpp index 7b7b76c9b..291de1aa7 100644 --- a/lib/ReplaceOpenCLBuiltinPass.cpp +++ b/lib/ReplaceOpenCLBuiltinPass.cpp @@ -3355,7 +3355,9 @@ bool ReplaceOpenCLBuiltinPass::replaceSampledReadImage(Function &F) { IsUnnormalizedStaticSampler()) { IRBuilder<> B(CI); // normalized coordinate - Coord = NormalizedCoordinate(M, B, Coord, Img, image_ty); + auto ImgDimFP = GetImageDimFP(M, B, Img, image_ty); + Coord = NormalizedCoordinate(M, B, Coord, ImgDimFP, + SamplerInitValue & CLK_FILTER_NEAREST); // copy the sampler but using normalized coordinate Sampler = CallInst::Create( SamplerFct, diff --git a/lib/SPIRVProducerPass.cpp b/lib/SPIRVProducerPass.cpp index a3da8f944..331733743 100644 --- a/lib/SPIRVProducerPass.cpp +++ b/lib/SPIRVProducerPass.cpp @@ -3667,35 +3667,25 @@ SPIRVProducerPassImpl::GenerateClspvInstruction(CallInst *Call, } break; } - case Builtins::kClspvGetImageSizes: { - addCapability(spv::CapabilityImageQuery); - Value *Image = Call->getArgOperand(0); - auto *ImageTy = InferType(Image, module->getContext(), &InferredTypeCache); - if (ImageDimensionality(ImageTy) != spv::Dim3D || - !IsSampledImageType(ImageTy)) { - llvm_unreachable("Unexpected Image in Builtins::kClspvGetImageSizes"); - } + case Builtins::kClspvGetNormalizedSamplerMask: { + auto GV = module->getGlobalVariable(clspv::PushConstantsVariableName()); + auto offset = getSPIRVValue(mdconst::extract( + Call->getMetadata(clspv::SamplerMaskPushConstantOffsetName()) + ->getOperand(0))); + auto i32 = IntegerType::get(module->getContext(), 32); SPIRVOperandVec Ops; - - Ops << getSPIRVType( - FixedVectorType::get(Type::getInt32Ty(module->getContext()), 3)) - << Image << getSPIRVInt32Constant(0); - RID = addSPIRVInst(spv::OpImageQuerySizeLod, Ops); - - Ops.clear(); - auto int4Ty = - FixedVectorType::get(Type::getInt32Ty(module->getContext()), 4); - Ops << getSPIRVType(int4Ty) << RID - << getSPIRVConstant(ConstantInt::get(int4Ty, (uint64_t)1)) << 0 << 1 - << 2 << 4; - RID = addSPIRVInst(spv::OpVectorShuffle, Ops); + Ops << getSPIRVPointerType( + PointerType::get(i32, GV->getType()->getPointerAddressSpace()), + i32) + << getSPIRVValue(GV) + << getSPIRVInt32Constant(GV->getValueType()->getStructNumElements() - 1) + << offset; + RID = addSPIRVInst(spv::OpAccessChain, Ops); Ops.clear(); - Ops << getSPIRVType( - FixedVectorType::get(Type::getFloatTy(module->getContext()), 4)) - << RID; - RID = addSPIRVInst(spv::OpConvertUToF, Ops); + Ops << getSPIRVType(i32) << RID; + RID = addSPIRVInst(spv::OpLoad, Ops); break; } default: @@ -7017,6 +7007,43 @@ void SPIRVProducerPassImpl::GenerateKernelReflection() { } } + auto *sampler_md = + F.getMetadata(clspv::PushConstantMetadataSamplerMaskName()); + if (sampler_md) { + auto GV = module->getGlobalVariable(clspv::PushConstantsVariableName()); + auto STy = cast(GV->getValueType()); + auto num_operands = sampler_md->getNumOperands(); + assert(num_operands % 2 == 0); + for (unsigned i = 0; i < num_operands; i += 2) { + auto ordinal = + mdconst::extract(sampler_md->getOperand(i + 0)) + ->getZExtValue(); + + // Ordinals could have changed because of pod arguments, remap it to the + // initial ordinal if needed. + auto find = ordinals_map.find(ordinal); + if (find != ordinals_map.end()) { + ordinal = find->second; + } + auto index = + mdconst::extract(sampler_md->getOperand(i + 1)) + ->getZExtValue(); + auto offset = GetExplicitLayoutStructMemberOffset( + STy, STy->getStructNumElements() - 1, DL) + + GetExplicitLayoutStructMemberOffset( + cast(STy->getStructElementType( + STy->getStructNumElements() - 1)), + index, DL); + Ops.clear(); + Ops << getSPIRVType(Type::getVoidTy(module->getContext())) << import_id + << reflection::ExtInstNormalizedSamplerMaskPushConstant + << kernel_decl << getSPIRVInt32Constant(ordinal) + << getSPIRVInt32Constant(offset) + << getSPIRVInt32Constant(sizeof(uint32_t)); + addSPIRVInst(spv::OpExtInst, Ops); + } + } + // Generate the reflection for the image channel getter function if it is // used in this kernel. auto *image_getter_md = F.getMetadata(clspv::PushConstantsMetadataImageChannelName()); diff --git a/lib/SamplerUtils.cpp b/lib/SamplerUtils.cpp index 2df7ca730..2962e3e75 100644 --- a/lib/SamplerUtils.cpp +++ b/lib/SamplerUtils.cpp @@ -13,19 +13,62 @@ // limitations under the License. #include "SamplerUtils.h" +#include "Builtins.h" +#include "BuiltinsEnum.h" +#include "Constants.h" +#include "Types.h" using namespace llvm; namespace clspv { -Value *NormalizedCoordinate(Module &M, IRBuilder<> &B, Value *Coord, Value *Img, - Type *ImgTy) { +Value *GetImageDimFP(Module &M, IRBuilder<> &B, Value *Img, Type *ImgTy) { + auto float4Ty = FixedVectorType::get(B.getFloatTy(), 4); auto getImageSizesFct = M.getOrInsertFunction( - "clspv.get_image_sizes", - FunctionType::get(Coord->getType(), {ImgTy}, false)); + "_Z13get_image_dim11ocl_image3d", + FunctionType::get(FixedVectorType::get(B.getInt32Ty(), 4), {ImgTy}, + false)); Value *ImgSizes = B.CreateCall(getImageSizesFct, {Img}); + return B.CreateSIToFP(ImgSizes, float4Ty); +} + +Value *NormalizedCoordinate(Module &M, IRBuilder<> &B, Value *Coord, + Value *ImgDimFP, bool FilteringNearest) { + if (FilteringNearest) { + auto float4Ty = FixedVectorType::get(B.getFloatTy(), 4); + auto getFloorFct = M.getOrInsertFunction( + "floor", FunctionType::get(float4Ty, {float4Ty}, false)); + Coord = B.CreateCall(getFloorFct, {Coord}); + Coord = B.CreateFAdd(Coord, ConstantFP::get(float4Ty, 0.5)); + } + + return B.CreateFDiv(Coord, ImgDimFP); +} - return B.CreateFDiv(Coord, ImgSizes); +bool isReadImage3DWithNonLiteralSampler(CallInst *call) { + auto FI = Builtins::Lookup(call->getCalledFunction()); + switch (FI.getType()) { + case Builtins::kReadImagef: + case Builtins::kReadImagei: + case Builtins::kReadImageui: { + if (FI.getParameter(1).isSampler()) { + Type *ImgTy = call->getOperand(0)->getType(); + auto sampler_call = dyn_cast(call->getOperand(1)); + bool literal_sampler = + sampler_call && + (sampler_call->getCalledFunction()->getName().contains( + TranslateSamplerInitializerFunction()) || + sampler_call->getCalledFunction()->getName().contains( + LiteralSamplerFunction())); + if (clspv::ImageDimensionality(ImgTy) == spv::Dim3D && !literal_sampler) { + return true; + } + } + } break; + default: + break; + } + return false; } } // namespace clspv diff --git a/lib/SamplerUtils.h b/lib/SamplerUtils.h index 82619814d..e20d195e8 100644 --- a/lib/SamplerUtils.h +++ b/lib/SamplerUtils.h @@ -20,9 +20,14 @@ namespace clspv { +llvm::Value *GetImageDimFP(llvm::Module &M, llvm::IRBuilder<> &B, + llvm::Value *Img, llvm::Type *ImgTy); + llvm::Value *NormalizedCoordinate(llvm::Module &M, llvm::IRBuilder<> &B, - llvm::Value *Coord, llvm::Value *Img, - llvm::Type *ImgTy); + llvm::Value *Coord, llvm::Value *ImgDimFP, + bool FilteringNearest); + +bool isReadImage3DWithNonLiteralSampler(llvm::CallInst *call); } // namespace clspv diff --git a/lib/SetImageChannelMetadataPass.cpp b/lib/SetImageChannelMetadataPass.cpp deleted file mode 100644 index ca1976680..000000000 --- a/lib/SetImageChannelMetadataPass.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2022 The Clspv Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" - -#include - -#include "Builtins.h" -#include "Constants.h" -#include "PushConstant.h" -#include "SetImageChannelMetadataPass.h" - -using namespace llvm; - -#define DEBUG_TYPE "setimagechannelmetadata" - -namespace { - -using ImageGetterMap = - std::map, SmallVector>; -using MetadataVector = SmallVector; - -unsigned getImageOrdinal(Value *Image) { - auto *img_call = dyn_cast(Image); - assert(img_call != nullptr); - assert(clspv::Builtins::Lookup(img_call->getCalledFunction()).getType() == - clspv::Builtins::kClspvResource); - return dyn_cast( - img_call->getOperand(clspv::ClspvOperand::kResourceArgIndex)) - ->getZExtValue(); -} - -void concatWithFunctionMetadata(Function *F, MetadataVector &MDs) { - auto fct_md = F->getMetadata(clspv::PushConstantsMetadataImageChannelName()); - if (fct_md != nullptr) { - for (unsigned i = 0; i < fct_md->getNumOperands(); i++) { - MDs.push_back(fct_md->getOperand(i)); - } - } -} - -unsigned setMetadata(Module &M, Function *F, ImageGetterMap &map) { - auto i32 = IntegerType::get(M.getContext(), 32); - - unsigned int count = 0; - for (const auto &elem : map) { - auto pc = elem.first.first; - auto ordinal = elem.first.second; - auto calls = elem.second; - - unsigned offset = count++; - - MetadataVector MDs = { - ConstantAsMetadata::get(ConstantInt::get(i32, ordinal)), - ConstantAsMetadata::get(ConstantInt::get(i32, offset)), - ConstantAsMetadata::get(ConstantInt::get(i32, pc))}; - concatWithFunctionMetadata(F, MDs); - // Set metadata for the function to be able to generate the appropriate - // reflection from it - F->setMetadata(clspv::PushConstantsMetadataImageChannelName(), - MDNode::get(M.getContext(), MDs)); - - auto call_md = - MDNode::get(M.getContext(), - {ConstantAsMetadata::get(ConstantInt::get(i32, offset))}); - for (auto call : calls) { - // Set metadata for the call to be able to generate the appropriate gep - // with the correct offset from it - call->setMetadata(clspv::ImageGetterPushConstantOffsetName(), call_md); - } - } - return count; -} - -void updatePushConstant(Module &M, unsigned max_elements) { - // Create and return the structure that will contains the needed values - std::vector orderTypes(max_elements, - IntegerType::get(M.getContext(), 32)); - StructType *Ty = StructType::get(M.getContext(), orderTypes); - - clspv::RedeclareGlobalPushConstants(M, Ty, - (int)clspv::PushConstant::ImageMetadata); -} -} // namespace - -PreservedAnalyses -clspv::SetImageChannelMetadataPass::run(Module &M, ModuleAnalysisManager &) { - PreservedAnalyses PA; - - unsigned max_elements = 0; - - // Go through function and instruction to look for image metadata getter - // function - for (Function &F : M) { - if (F.isDeclaration() || F.getCallingConv() != CallingConv::SPIR_KERNEL) { - continue; - } - ImageGetterMap Map; - for (BasicBlock &BB : F) { - for (Instruction &I : BB) { - if (auto call = dyn_cast(&I)) { - auto Name = call->getCalledFunction()->getName(); - if (Name.contains("get_image_channel_order")) { - unsigned ordinal = getImageOrdinal(call->getArgOperand(0)); - Map[std::make_pair((unsigned)clspv::ImageMetadata::ChannelOrder, - ordinal)] - .push_back(call); - } else if (Name.contains("get_image_channel_data_type")) { - unsigned ordinal = getImageOrdinal(call->getArgOperand(0)); - Map[std::make_pair((unsigned)clspv::ImageMetadata::ChannelDataType, - ordinal)] - .push_back(call); - } - } - } - } - max_elements = std::max(max_elements, setMetadata(M, &F, Map)); - } - - if (max_elements > 0) - updatePushConstant(M, max_elements); - - return PA; -} diff --git a/lib/SetImageMetadataPass.cpp b/lib/SetImageMetadataPass.cpp new file mode 100644 index 000000000..e7f8551bc --- /dev/null +++ b/lib/SetImageMetadataPass.cpp @@ -0,0 +1,220 @@ +// Copyright 2022 The Clspv Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#include +#include + +#include "Builtins.h" +#include "Constants.h" +#include "PushConstant.h" +#include "SamplerUtils.h" +#include "SetImageMetadataPass.h" +#include "Types.h" +#include "clspv/Option.h" +#include "clspv/Sampler.h" + +using namespace llvm; + +#define DEBUG_TYPE "setimagemetadata" + +namespace { + +using ImageMdMap = std::map, std::set>; +using MetadataVector = SmallVector; + +unsigned getOrdinal(Value *Val) { + auto *call = dyn_cast(Val); + assert(call != nullptr); + assert(clspv::Builtins::Lookup(call->getCalledFunction()).getType() == + clspv::Builtins::kClspvResource); + return dyn_cast( + call->getOperand(clspv::ClspvOperand::kResourceArgIndex)) + ->getZExtValue(); +} + +void concatWithFunctionMetadata(Function *F, MetadataVector &MDs) { + auto fct_md = F->getMetadata(clspv::PushConstantsMetadataImageChannelName()); + if (fct_md != nullptr) { + for (unsigned i = 0; i < fct_md->getNumOperands(); i++) { + MDs.push_back(fct_md->getOperand(i)); + } + } +} + +void setImageSamplerMetadata(Module &M, Function *F, unsigned ordinal, + unsigned offset, unsigned pc, + std::set &samplers) { + DenseMap cache; + auto i32 = IntegerType::get(M.getContext(), 32); + MetadataVector MDs = {ConstantAsMetadata::get(ConstantInt::get(i32, ordinal)), + ConstantAsMetadata::get(ConstantInt::get(i32, offset))}; + concatWithFunctionMetadata(F, MDs); + // Set metadata for the function to be able to generate the appropriate + // reflection from it + F->setMetadata(clspv::PushConstantMetadataSamplerMaskName(), + MDNode::get(M.getContext(), MDs)); + + auto call_md = MDNode::get( + M.getContext(), {ConstantAsMetadata::get(ConstantInt::get(i32, offset))}); + for (auto sampler : samplers) { + for (auto *U : sampler->users()) { + assert(isa(U)); + auto call = cast(U); + IRBuilder<> B(call); + Value *Coord = call->getOperand(2); + Value *Img = call->getOperand(0); + Type *ImgTy = clspv::InferType(Img, M.getContext(), &cache); + Value *ImgDimFP = clspv::GetImageDimFP(M, B, Img, ImgTy); + Value *NormCoordNearest = + clspv::NormalizedCoordinate(M, B, Coord, ImgDimFP, true); + Value *NormCoordLinear = + clspv::NormalizedCoordinate(M, B, Coord, ImgDimFP, false); + + auto getSamplerNormFct = + M.getOrInsertFunction("clspv.get_normalized_sampler_mask", + FunctionType::get(B.getInt32Ty(), {}, false)); + auto SamplerNorm = B.CreateCall(getSamplerNormFct, {}); + SamplerNorm->setMetadata(clspv::SamplerMaskPushConstantOffsetName(), + call_md); + + auto FilterMask = + B.CreateAnd(SamplerNorm, B.getInt32(clspv::kSamplerFilterMask)); + auto FilterCond = + B.CreateICmpEQ(FilterMask, B.getInt32(clspv::CLK_FILTER_NEAREST)); + if (clspv::Option::SpvVersion() <= + clspv::Option::SPIRVVersion::SPIRV_1_3) { + FilterCond = B.CreateVectorSplat(4, FilterCond); + } + + auto NormCoord = + B.CreateSelect(FilterCond, NormCoordNearest, NormCoordLinear); + + auto NormMask = B.CreateAnd( + SamplerNorm, B.getInt32(clspv::kSamplerNormalizedCoordsMask)); + auto NormCond = B.CreateICmpEQ( + NormMask, B.getInt32(clspv::CLK_NORMALIZED_COORDS_TRUE)); + if (clspv::Option::SpvVersion() <= + clspv::Option::SPIRVVersion::SPIRV_1_3) { + NormCond = B.CreateVectorSplat(4, NormCond); + } + + auto Select = B.CreateSelect(NormCond, Coord, NormCoord); + + call->setOperand(2, Select); + } + } +} + +void setImageChannelMetadata(Module &M, Function *F, unsigned ordinal, + unsigned offset, unsigned pc, + std::set &calls) { + auto i32 = IntegerType::get(M.getContext(), 32); + MetadataVector MDs = {ConstantAsMetadata::get(ConstantInt::get(i32, ordinal)), + ConstantAsMetadata::get(ConstantInt::get(i32, offset)), + ConstantAsMetadata::get(ConstantInt::get(i32, pc))}; + concatWithFunctionMetadata(F, MDs); + // Set metadata for the function to be able to generate the appropriate + // reflection from it + F->setMetadata(clspv::PushConstantsMetadataImageChannelName(), + MDNode::get(M.getContext(), MDs)); + + auto call_md = MDNode::get( + M.getContext(), {ConstantAsMetadata::get(ConstantInt::get(i32, offset))}); + for (auto call : calls) { + assert(isa(call)); + // Set metadata for the call to be able to generate the appropriate gep + // with the correct offset from it + cast(call)->setMetadata( + clspv::ImageGetterPushConstantOffsetName(), call_md); + } +} + +unsigned setMetadata(Module &M, Function *F, ImageMdMap &map) { + + unsigned int count = 0; + for (const auto &elem : map) { + auto pc = elem.first.first; + auto ordinal = elem.first.second; + auto calls = elem.second; + + unsigned offset = count++; + if (pc == (unsigned)clspv::ImageMetadata::NormalizedSamplerMask) { + setImageSamplerMetadata(M, F, ordinal, offset, pc, calls); + } else { + setImageChannelMetadata(M, F, ordinal, offset, pc, calls); + } + } + return count; +} + +void updatePushConstant(Module &M, unsigned max_elements) { + // Create and return the structure that will contains the needed values + std::vector orderTypes(max_elements, + IntegerType::get(M.getContext(), 32)); + StructType *Ty = StructType::get(M.getContext(), orderTypes); + + clspv::RedeclareGlobalPushConstants(M, Ty, + (int)clspv::PushConstant::ImageMetadata); +} +} // namespace + +PreservedAnalyses clspv::SetImageMetadataPass::run(Module &M, + ModuleAnalysisManager &) { + PreservedAnalyses PA; + + unsigned max_elements = 0; + + // Go through function and instruction to look for image metadata getter + // function + for (Function &F : M) { + if (F.isDeclaration() || F.getCallingConv() != CallingConv::SPIR_KERNEL) { + continue; + } + ImageMdMap Map; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (auto call = dyn_cast(&I)) { + auto Name = call->getCalledFunction()->getName(); + if (Name.contains("get_image_channel_order")) { + unsigned ordinal = getOrdinal(call->getArgOperand(0)); + Map[std::make_pair((unsigned)clspv::ImageMetadata::ChannelOrder, + ordinal)] + .insert(call); + } else if (Name.contains("get_image_channel_data_type")) { + unsigned ordinal = getOrdinal(call->getArgOperand(0)); + Map[std::make_pair((unsigned)clspv::ImageMetadata::ChannelDataType, + ordinal)] + .insert(call); + } else if (isReadImage3DWithNonLiteralSampler(call)) { + auto sampler = call->getArgOperand(1); + unsigned ordinal = getOrdinal(sampler); + Map[std::make_pair( + (unsigned)clspv::ImageMetadata::NormalizedSamplerMask, + ordinal)] + .insert(sampler); + } + } + } + } + max_elements = std::max(max_elements, setMetadata(M, &F, Map)); + } + + if (max_elements > 0) + updatePushConstant(M, max_elements); + + return PA; +} diff --git a/lib/SetImageChannelMetadataPass.h b/lib/SetImageMetadataPass.h similarity index 92% rename from lib/SetImageChannelMetadataPass.h rename to lib/SetImageMetadataPass.h index 3f22eadcd..1970bccce 100644 --- a/lib/SetImageChannelMetadataPass.h +++ b/lib/SetImageMetadataPass.h @@ -24,8 +24,7 @@ // This is done by the InlineFuncWithImageMetadataGetterPass namespace clspv { -struct SetImageChannelMetadataPass - : llvm::PassInfoMixin { +struct SetImageMetadataPass : llvm::PassInfoMixin { llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &); }; } // namespace clspv diff --git a/test/AutoPodArgs/contains_read_image3d.ll b/test/AutoPodArgs/contains_read_image3d.ll new file mode 100644 index 000000000..b0bbd28ba --- /dev/null +++ b/test/AutoPodArgs/contains_read_image3d.ll @@ -0,0 +1,90 @@ +; RUN: clspv-opt %s -o %t.ll --passes=auto-pod-args +; RUN: FileCheck %s < %t.ll + +; CHECK: define dso_local spir_kernel void @foo(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler, ptr addrspace(1) align 16 %out, <4 x i32> %coord) +; CHECK-SAME: !clspv.pod_args_impl [[MD:![^ ]+]] +; CHECK: [[MD]] = !{i32 3} + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: convergent norecurse nounwind +define dso_local spir_func <4 x float> @bar(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler, <4 x i32> %coord) #0 !kernel_arg_name !14 { +entry: + %img.addr = alloca target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0), align 4 + store target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) zeroinitializer, ptr %img.addr, align 4 + %sampler.addr = alloca target("spirv.Sampler"), align 4 + store target("spirv.Sampler") zeroinitializer, ptr %sampler.addr, align 4 + %coord.addr = alloca <4 x i32>, align 16 + store <4 x i32> zeroinitializer, ptr %coord.addr, align 16 + store target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, ptr %img.addr, align 4 + store target("spirv.Sampler") %sampler, ptr %sampler.addr, align 4 + store <4 x i32> %coord, ptr %coord.addr, align 16 + %0 = load target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0), ptr %img.addr, align 4 + %1 = load target("spirv.Sampler"), ptr %sampler.addr, align 4 + %2 = load <4 x i32>, ptr %coord.addr, align 16 + %call = call spir_func <4 x float> @_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %0, target("spirv.Sampler") %1, <4 x i32> %2) #3 + ret <4 x float> %call +} + +; Function Attrs: convergent nounwind willreturn memory(read) +declare !kernel_arg_name !17 spir_func <4 x float> @_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0), target("spirv.Sampler"), <4 x i32>) #1 + +; Function Attrs: convergent norecurse nounwind +define dso_local spir_kernel void @foo(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler, ptr addrspace(1) align 16 %out, <4 x i32> %coord) #2 !kernel_arg_name !16 !kernel_arg_addr_space !17 !kernel_arg_access_qual !18 !kernel_arg_type !19 !kernel_arg_base_type !20 !kernel_arg_type_qual !21 { +entry: + %img.addr = alloca target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0), align 4 + store target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) zeroinitializer, ptr %img.addr, align 4 + %sampler.addr = alloca target("spirv.Sampler"), align 4 + store target("spirv.Sampler") zeroinitializer, ptr %sampler.addr, align 4 + %out.addr = alloca ptr addrspace(1), align 4 + store ptr addrspace(1) null, ptr %out.addr, align 4 + %coord.addr = alloca <4 x i32>, align 16 + store <4 x i32> zeroinitializer, ptr %coord.addr, align 16 + store target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, ptr %img.addr, align 4 + store target("spirv.Sampler") %sampler, ptr %sampler.addr, align 4 + store ptr addrspace(1) %out, ptr %out.addr, align 4 + store <4 x i32> %coord, ptr %coord.addr, align 16 + %0 = load target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0), ptr %img.addr, align 4 + %1 = load target("spirv.Sampler"), ptr %sampler.addr, align 4 + %2 = load <4 x i32>, ptr %coord.addr, align 16 + %call = call spir_func <4 x float> @bar(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %0, target("spirv.Sampler") %1, <4 x i32> %2) #4 + %3 = load ptr addrspace(1), ptr %out.addr, align 4 + store <4 x float> %call, ptr addrspace(1) %3, align 16 + ret void +} + +attributes #0 = { convergent norecurse nounwind "less-precise-fpmad"="true" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" } +attributes #1 = { convergent nounwind willreturn memory(read) "less-precise-fpmad"="true" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" } +attributes #2 = { convergent norecurse nounwind "less-precise-fpmad"="true" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #3 = { convergent nobuiltin nounwind willreturn memory(read) "no-builtins" } +attributes #4 = { convergent nobuiltin nounwind "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3} +!llvm.ident = !{!4, !5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !5, !5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} +!_Z28clspv.entry_point_attributes = !{!7, !8, !9, !10, !11, !12, !13} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"direct-access-external-data", i32 0} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 2} +!4 = !{!"clang version 18.0.0 (git@github.com:rjodinchr/llvm-project.git 9dd7a0568c68e41f287de190ae62950d273405c8)"} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 1e6fc9626c0f49ce952a67aef47e86253d13f74a)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project ab674234c440ed27302f58eeccc612c83b32c43f)"} +!7 = !{!"_Z4sqrtf", !" __attribute__((overloadable)) __attribute__((const))"} +!8 = !{!"_Z4sqrtDv2_f", !" __attribute__((overloadable)) __attribute__((const))"} +!9 = !{!"_Z4sqrtDv3_f", !" __attribute__((overloadable)) __attribute__((const))"} +!10 = !{!"_Z4sqrtDv4_f", !" __attribute__((overloadable)) __attribute__((const))"} +!11 = !{!"_Z4sqrtDv8_f", !" __attribute__((overloadable)) __attribute__((const))"} +!12 = !{!"_Z4sqrtDv16_f", !" __attribute__((overloadable)) __attribute__((const))"} +!13 = !{!"foo", !" kernel"} +!14 = !{!"img", !"sampler", !"coord"} +!15 = !{!"", !"", !""} +!16 = !{!"img", !"sampler", !"out", !"coord"} +!17 = !{i32 1, i32 0, i32 1, i32 0} +!18 = !{!"read_only", !"none", !"none", !"none"} +!19 = !{!"image3d_t", !"sampler_t", !"float4*", !"int4"} +!20 = !{!"image3d_t", !"sampler_t", !"float __attribute__((ext_vector_type(4)))*", !"int __attribute__((ext_vector_type(4)))"} +!21 = !{!"", !"", !"", !""} diff --git a/test/DirectResourceAccess/ro_image3_sampler_args.cl b/test/DirectResourceAccess/ro_image3_sampler_args.cl index ec1bf6580..7094fc09d 100644 --- a/test/DirectResourceAccess/ro_image3_sampler_args.cl +++ b/test/DirectResourceAccess/ro_image3_sampler_args.cl @@ -5,29 +5,31 @@ // Just for fun, swap arguments in the helpers. +const sampler_t s = CLK_NORMALIZED_COORDS_TRUE; + __attribute__((noinline)) -float4 core(read_only image3d_t im, float4 coord, sampler_t s) { +float4 core(read_only image3d_t im, float4 coord) { return read_imagef(im, s, coord); } __attribute__((noinline)) -void apple(read_only image3d_t im, sampler_t s, float4 coord, global float4 *A) { - *A = core(im, coord, s); } +void apple(read_only image3d_t im, float4 coord, global float4 *A) { + *A = core(im, coord); } -kernel void foo(float4 coord, sampler_t s, read_only image3d_t im, global float4* A) { - apple(im, s, 2 * coord, A); } -kernel void bar(float4 coord, sampler_t s, read_only image3d_t im, global float4* A) { - apple(im, s, 3 * coord, A); } +kernel void foo(float4 coord, read_only image3d_t im, global float4* A) { + apple(im, 2 * coord, A); } +kernel void bar(float4 coord, read_only image3d_t im, global float4* A) { + apple(im, 3 * coord, A); } // CHECK: OpEntryPoint GLCompute [[_55:%[0-9a-zA-Z_]+]] "foo" // CHECK: OpEntryPoint GLCompute [[_64:%[0-9a-zA-Z_]+]] "bar" -// CHECK: OpDecorate [[_32:%[0-9a-zA-Z_]+]] Binding 1 -// CHECK: OpDecorate [[_33:%[0-9a-zA-Z_]+]] Binding 2 +// CHECK: OpDecorate [[_32:%[0-9a-zA-Z_]+]] Binding 0 +// CHECK: OpDecorate [[_33:%[0-9a-zA-Z_]+]] Binding 1 // CHECK-DAG: [[_void:%[0-9a-zA-Z_]+]] = OpTypeVoid // CHECK-DAG: [[_32]] = OpVariable {{.*}} UniformConstant // CHECK-DAG: [[_33]] = OpVariable {{.*}} UniformConstant // CHECK: [[_55]] = OpFunction [[_void]] // CHECK: [[_64]] = OpFunction [[_void]] // CHECK: [[_36:%[0-9a-zA-Z_]+]] = OpFunction -// CHECK: [[_41:%[0-9a-zA-Z_]+]] = OpLoad {{.*}} [[_32]] -// CHECK: [[_42:%[0-9a-zA-Z_]+]] = OpLoad {{.*}} [[_33]] +// CHECK-DAG: [[_41:%[0-9a-zA-Z_]+]] = OpLoad {{.*}} [[_32]] +// CHECK-DAG: [[_42:%[0-9a-zA-Z_]+]] = OpLoad {{.*}} [[_33]] // CHECK: [[_45:%[0-9a-zA-Z_]+]] = OpFunction [[_void]] diff --git a/test/ImageBuiltins/get_image_channel_image1d_array_read_only.ll b/test/ImageBuiltins/get_image_channel_image1d_array_read_only.ll index 8e836b886..fae15186a 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_array_read_only.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_array_read_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_array_read_write.ll b/test/ImageBuiltins/get_image_channel_image1d_array_read_write.ll index 776b8b079..157cea884 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_array_read_write.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_array_read_write.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_array_write_only.ll b/test/ImageBuiltins/get_image_channel_image1d_array_write_only.ll index cd51c2e71..cb1d04690 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_array_write_only.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_array_write_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_buffer_read_only.ll b/test/ImageBuiltins/get_image_channel_image1d_buffer_read_only.ll index 9b94e7fec..ac48c90bd 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_buffer_read_only.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_buffer_read_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_buffer_read_write.ll b/test/ImageBuiltins/get_image_channel_image1d_buffer_read_write.ll index 894013c30..aaf26191e 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_buffer_read_write.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_buffer_read_write.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_buffer_write_only.ll b/test/ImageBuiltins/get_image_channel_image1d_buffer_write_only.ll index 8ad074d0e..6f606ecd0 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_buffer_write_only.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_buffer_write_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_read_only.ll b/test/ImageBuiltins/get_image_channel_image1d_read_only.ll index 6d318b2bf..ddef33c51 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_read_only.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_read_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_read_write.ll b/test/ImageBuiltins/get_image_channel_image1d_read_write.ll index 222a7fd8d..7ec4f4829 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_read_write.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_read_write.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image1d_write_only.ll b/test/ImageBuiltins/get_image_channel_image1d_write_only.ll index a4e612ffe..f079b1913 100644 --- a/test/ImageBuiltins/get_image_channel_image1d_write_only.ll +++ b/test/ImageBuiltins/get_image_channel_image1d_write_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image2d_array_read_only.ll b/test/ImageBuiltins/get_image_channel_image2d_array_read_only.ll index 8dd640c67..2a7b392b2 100644 --- a/test/ImageBuiltins/get_image_channel_image2d_array_read_only.ll +++ b/test/ImageBuiltins/get_image_channel_image2d_array_read_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image2d_array_read_write.ll b/test/ImageBuiltins/get_image_channel_image2d_array_read_write.ll index 75da9eece..d2c8ad11f 100644 --- a/test/ImageBuiltins/get_image_channel_image2d_array_read_write.ll +++ b/test/ImageBuiltins/get_image_channel_image2d_array_read_write.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image2d_array_write_only.ll b/test/ImageBuiltins/get_image_channel_image2d_array_write_only.ll index 94560f838..df128056a 100644 --- a/test/ImageBuiltins/get_image_channel_image2d_array_write_only.ll +++ b/test/ImageBuiltins/get_image_channel_image2d_array_write_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image2d_read_only.ll b/test/ImageBuiltins/get_image_channel_image2d_read_only.ll index e5e2d3cb8..f5c215d93 100644 --- a/test/ImageBuiltins/get_image_channel_image2d_read_only.ll +++ b/test/ImageBuiltins/get_image_channel_image2d_read_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image2d_read_write.ll b/test/ImageBuiltins/get_image_channel_image2d_read_write.ll index ca874aa94..ef1580fdf 100644 --- a/test/ImageBuiltins/get_image_channel_image2d_read_write.ll +++ b/test/ImageBuiltins/get_image_channel_image2d_read_write.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image2d_write_only.ll b/test/ImageBuiltins/get_image_channel_image2d_write_only.ll index c274d647a..ddefd874b 100644 --- a/test/ImageBuiltins/get_image_channel_image2d_write_only.ll +++ b/test/ImageBuiltins/get_image_channel_image2d_write_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image3d_read_only.ll b/test/ImageBuiltins/get_image_channel_image3d_read_only.ll index 4d72964c8..4aafd1989 100644 --- a/test/ImageBuiltins/get_image_channel_image3d_read_only.ll +++ b/test/ImageBuiltins/get_image_channel_image3d_read_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image3d_read_write.ll b/test/ImageBuiltins/get_image_channel_image3d_read_write.ll index e5023f048..efe320adf 100644 --- a/test/ImageBuiltins/get_image_channel_image3d_read_write.ll +++ b/test/ImageBuiltins/get_image_channel_image3d_read_write.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_image3d_write_only.ll b/test/ImageBuiltins/get_image_channel_image3d_write_only.ll index fe4e9dbbe..f67331123 100644 --- a/test/ImageBuiltins/get_image_channel_image3d_write_only.ll +++ b/test/ImageBuiltins/get_image_channel_image3d_write_only.ll @@ -1,5 +1,5 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/get_image_channel_multiple_kernel.ll b/test/ImageBuiltins/get_image_channel_multiple_kernel.ll index b5e16ef0b..2d806e7d7 100644 --- a/test/ImageBuiltins/get_image_channel_multiple_kernel.ll +++ b/test/ImageBuiltins/get_image_channel_multiple_kernel.ll @@ -1,4 +1,4 @@ -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/test/ImageBuiltins/get_image_channel_test_gen.py b/test/ImageBuiltins/get_image_channel_test_gen.py index 00f5ee92c..b71ca84b0 100644 --- a/test/ImageBuiltins/get_image_channel_test_gen.py +++ b/test/ImageBuiltins/get_image_channel_test_gen.py @@ -63,7 +63,7 @@ """) TEMPLATE_LL=Template(""" -; RUN: clspv-opt %s -o %t.ll --passes=set-image-channel-metadata +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata ; RUN: FileCheck %s < %t.ll ; AUTO-GENERATED TEST FILE diff --git a/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.cl b/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.cl index 1a8e51166..374e877e8 100644 --- a/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.cl +++ b/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.cl @@ -8,16 +8,17 @@ // CHECK-DAG: [[uint3:%[^ ]+]] = OpTypeVector [[uint]] 3 // CHECK-DAG: [[uint4:%[^ ]+]] = OpTypeVector [[uint]] 4 // CHECK-DAG: [[float4:%[^ ]+]] = OpTypeVector [[float]] 4 +// CHECK-DAG: [[float_0_5:%[^ ]+]] = OpConstant [[float]] 0.5 // CHECK-DAG: [[float_0:%[^ ]+]] = OpConstant [[float]] 0 // CHECK-DAG: [[uint_0:%[^ ]+]] = OpConstant [[uint]] 0 -// CHECK-DAG: [[uint_1:%[^ ]+]] = OpConstant [[uint]] 1 // CHECK-DAG: [[uint_21:%[^ ]+]] = OpConstant [[uint]] 21 -// CHECK-DAG: [[vec:%[^ ]+]] = OpConstantComposite [[uint4]] [[uint_1]] [[uint_1]] [[uint_1]] [[uint_1]] // CHECK: [[sizes:%[^ ]+]] = OpImageQuerySizeLod [[uint3]] {{.*}} [[uint_0]] -// CHECK: [[shuffle:%[^ ]+]] = OpVectorShuffle [[uint4]] [[sizes]] [[vec]] 0 1 2 4 -// CHECK: [[convert:%[^ ]+]] = OpConvertUToF [[float4]] [[shuffle]] -// CHECK: [[div:%[^ ]+]] = OpFDiv [[float4]] %44 [[convert]] +// CHECK: [[shuffle:%[^ ]+]] = OpCompositeConstruct [[uint4]] [[sizes]] [[uint_0]] +// CHECK: [[convert:%[^ ]+]] = OpConvertSToF [[float4]] [[shuffle]] +// CHECK: [[floor:%[^ ]+]] = OpExtInst [[float4]] {{.*}} Floor {{.*}} +// CHECK: [[add:%[^ ]+]] = OpFAdd [[float4]] [[floor]] {{.*}} +// CHECK: [[div:%[^ ]+]] = OpFDiv [[float4]] [[add]] [[convert]] // CHECK: OpImageSampleExplicitLod [[float4]] {{.*}} [[div]] Lod [[float_0]] // CHECK: OpExtInst %void {{.*}} LiteralSampler [[uint_0]] [[uint_0]] [[uint_21]] diff --git a/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.ll b/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.ll index 143f12d84..e334cf010 100644 --- a/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.ll +++ b/test/ImageBuiltins/read_image3d_with_literal_unorm_sampler.ll @@ -3,8 +3,11 @@ ; CHECK: [[sampler:%[^ ]+]] = call target("spirv.Sampler") @__translate_sampler_initializer(i32 21) ; CHECK: [[convert:%[^ ]+]] = sitofp <4 x i32> to <4 x float> -; CHECK: [[sizes:%[^ ]+]] = call <4 x float> @clspv.get_image_sizes(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img) -; CHECK: [[div:%[^ ]+]] = fdiv <4 x float> [[convert]], [[sizes]] +; CHECK: [[sizes:%[^ ]+]] = call <4 x i32> @_Z13get_image_dim11ocl_image3d(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img) +; CHECK: [[sizes_convert:%[^ ]+]] = sitofp <4 x i32> [[sizes]] to <4 x float> +; CHECK: [[floor:%[^ ]+]] = call <4 x float> @floor(<4 x float> [[convert]]) +; CHECK: [[add:%[^ ]+]] = fadd <4 x float> [[floor]], +; CHECK: [[div:%[^ ]+]] = fdiv <4 x float> [[add]], [[sizes_convert]] ; CHECK: call <4 x float> @_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") [[sampler]], <4 x float> [[div]]) diff --git a/test/ImageBuiltins/read_image3d_with_non_literal_sampler.cl b/test/ImageBuiltins/read_image3d_with_non_literal_sampler.cl new file mode 100644 index 000000000..fc72a1136 --- /dev/null +++ b/test/ImageBuiltins/read_image3d_with_non_literal_sampler.cl @@ -0,0 +1,45 @@ +// RUN: clspv %s -o %t.spv -cl-native-math +// RUN: spirv-dis %t.spv -o %t.spvasm +// RUN: FileCheck %s < %t.spvasm +// RUN: spirv-val %t.spv --target-env spv1.0 + +// CHECK-DAG: [[bool:%[^ ]+]] = OpTypeBool +// CHECK-DAG: [[uint:%[^ ]+]] = OpTypeInt 32 0 +// CHECK-DAG: [[float:%[^ ]+]] = OpTypeFloat 32 +// CHECK-DAG: [[bool4:%[^ ]+]] = OpTypeVector [[bool]] 4 +// CHECK-DAG: [[uint3:%[^ ]+]] = OpTypeVector [[uint]] 3 +// CHECK-DAG: [[uint4:%[^ ]+]] = OpTypeVector [[uint]] 4 +// CHECK-DAG: [[float4:%[^ ]+]] = OpTypeVector [[float]] 4 +// CHECK-DAG: [[uint_0:%[^ ]+]] = OpConstant [[uint]] 0 +// CHECK-DAG: [[uint_16:%[^ ]+]] = OpConstant [[uint]] 16 +// CHECK-DAG: [[uint_1:%[^ ]+]] = OpConstant [[uint]] 1 +// CHECK-DAG: [[uint_48:%[^ ]+]] = OpConstant [[uint]] 48 +// CHECK-DAG: [[uint_4:%[^ ]+]] = OpConstant [[uint]] 4 + +// CHECK: [[convert:%[^ ]+]] = OpConvertSToF [[float4]] {{.*}} +// CHECK: [[image_sizes:%[^ ]+]] = OpImageQuerySizeLod [[uint3]] {{.*}} [[uint_0]] +// CHECK: [[image_sizes4:%[^ ]+]] = OpCompositeConstruct [[uint4]] [[image_sizes]] [[uint_0]] +// CHECK: [[image_sizes_float:%[^ ]+]] = OpConvertSToF [[float4]] [[image_sizes4]] +// CHECK: [[floor:%[^ ]+]] = OpExtInst [[float4]] {{.*}} Floor [[convert]] +// CHECK: [[fadd:%[^ ]+]] = OpFAdd [[float4]] [[floor]] {{.*}} +// CHECK: [[fdix_nearest:%[^ ]+]] = OpFDiv [[float4]] [[fadd]] [[image_sizes_float]] +// CHECK: [[fdiv_linear:%[^ ]+]] = OpFDiv [[float4]] [[convert]] [[image_sizes_float]] +// CHECK: [[gep_sampler_mask:%[^ ]+]] = OpAccessChain +// CHECK: [[sampler_mask:%[^ ]+]] = OpLoad [[uint]] [[gep_sampler_mask]] +// CHECK: [[and:%[^ ]+]] = OpBitwiseAnd [[uint]] [[sampler_mask]] [[uint_48]] +// CHECK: [[cmp:%[^ ]+]] = OpIEqual [[bool]] [[and]] [[uint_16]] +// CHECK: [[insert:%[^ ]+]] = OpCompositeInsert [[bool4]] [[cmp]] {{.*}} 0 +// CHECK: [[shuffle:%[^ ]+]] = OpVectorShuffle [[bool4]] [[insert]] {{.*}} 0 0 0 0 +// CHECK: [[select:%[^ ]+]] = OpSelect [[float4]] [[shuffle]] [[fdix_nearest]] [[fdiv_linear]] +// CHECK: [[and:%[^ ]+]] = OpBitwiseAnd [[uint]] [[sampler_mask]] [[uint_1]] +// CHECK: [[cmp:%[^ ]+]] = OpIEqual [[bool]] [[and]] [[uint_1]] +// CHECK: [[insert:%[^ ]+]] = OpCompositeInsert [[bool4]] [[cmp]] {{.*}} 0 +// CHECK: [[shuffle:%[^ ]+]] = OpVectorShuffle [[bool4]] [[insert]] {{.*}} 0 0 0 0 +// CHECK: OpSelect [[float4]] [[shuffle]] [[convert]] [[select]] + +// CHECK: [[kernel:%[^ ]+]] = OpExtInst %void {{.*}} Kernel +// CHECK: OpExtInst %void {{.*}} NormalizedSamplerMaskPushConstant [[kernel]] [[uint_1]] [[uint_16]] [[uint_4]] + +kernel void foo(read_only image3d_t img, sampler_t sampler, global float4 *out, int4 coord) { + *out = read_imagef(img, sampler, coord); +} diff --git a/test/ImageBuiltins/read_image3d_with_non_literal_sampler.ll b/test/ImageBuiltins/read_image3d_with_non_literal_sampler.ll new file mode 100644 index 000000000..f910c2585 --- /dev/null +++ b/test/ImageBuiltins/read_image3d_with_non_literal_sampler.ll @@ -0,0 +1,104 @@ +; RUN: clspv-opt %s -o %t.ll --passes=set-image-metadata +; RUN: FileCheck %s < %t.ll + +; CHECK: [[coord:%[^ ]+]] = sitofp <4 x i32> {{.*}} to <4 x float> +; CHECK: [[image_dim:%[^ ]+]] = call <4 x i32> @_Z13get_image_dim11ocl_image3d(target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) %0) +; CHECK: [[convert:%[^ ]+]] = sitofp <4 x i32> [[image_dim]] to <4 x float> +; CHECK: [[floor:%[^ ]+]] = call <4 x float> @floor(<4 x float> [[coord]]) +; CHECK: [[fadd:%[^ ]+]] = fadd <4 x float> [[floor]], +; CHECK: [[fdiv_nearest:%[^ ]+]] = fdiv <4 x float> [[fadd]], [[convert]] +; CHECK: [[fdiv_linear:%[^ ]+]] = fdiv <4 x float> [[coord]], [[convert]] +; CHECK: [[sampler_mask:%[^ ]+]] = call i32 @clspv.get_normalized_sampler_mask(), !sampler_mask_push_constant_offset !29 +; CHECK: [[and:%[^ ]+]] = and i32 [[sampler_mask]], 48 +; CHECK: [[cmp:%[^ ]+]] = icmp eq i32 [[and]], 16 +; CHECK: [[insert:%[^ ]+]] = insertelement <4 x i1> poison, i1 [[cmp]], i64 0 +; CHECK: [[shuffle:%[^ ]+]] = shufflevector <4 x i1> [[insert]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK: [[select:%[^ ]+]] = select <4 x i1> [[shuffle]], <4 x float> [[fdiv_nearest]], <4 x float> [[fdiv_linear]] +; CHECK: [[and:%[^ ]+]] = and i32 [[sampler_mask]], 1 +; CHECK: [[cmp:%[^ ]+]] = icmp eq i32 [[and]], 1 +; CHECK: [[insert:%[^ ]+]] = insertelement <4 x i1> poison, i1 [[cmp]], i64 0 +; CHECK: [[shuffle:%[^ ]+]] = shufflevector <4 x i1> [[insert]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK: [[new_coord:%[^ ]+]] = select <4 x i1> [[shuffle]], <4 x float> [[coord]], <4 x float> [[select]] +; CHECK: tail call <4 x float> @_Z11read_imagef30ocl_image3d_ro_t.float.sampled11ocl_samplerDv4_f(target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) %0, target("spirv.Sampler") %1, <4 x float> [[new_coord]]) #2 + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +%0 = type { %1 } +%1 = type { i32, i32, i32, i32 } + +@__spirv_WorkgroupSize = local_unnamed_addr addrspace(8) global <3 x i32> zeroinitializer +@__push_constants = local_unnamed_addr addrspace(9) global %0 zeroinitializer, !push_constants !0 + +declare <4 x float> @_Z11read_imagef30ocl_image3d_ro_t.float.sampled11ocl_samplerDv4_f(target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0), target("spirv.Sampler"), <4 x float>) + +; Function Attrs: norecurse nounwind +define spir_kernel void @foo(target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) %img, target("spirv.Sampler") %sampler, ptr addrspace(1) nocapture writeonly align 16 %out) #0 !kernel_arg_addr_space !16 !kernel_arg_access_qual !17 !kernel_arg_type !18 !kernel_arg_base_type !19 !kernel_arg_type_qual !20 !kernel_arg_name !21 !clspv.pod_args_impl !22 !kernel_arg_map !23 { +entry: + %0 = call target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) @_Z14clspv.resource.0(i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) zeroinitializer) + %1 = call target("spirv.Sampler") @_Z14clspv.resource.1(i32 0, i32 1, i32 8, i32 1, i32 1, i32 0, target("spirv.Sampler") zeroinitializer) + %2 = call ptr addrspace(1) @_Z14clspv.resource.2(i32 0, i32 2, i32 0, i32 2, i32 2, i32 0, { [0 x <4 x float>] } zeroinitializer) + %3 = getelementptr { [0 x <4 x float>] }, ptr addrspace(1) %2, i32 0, i32 0, i32 0 + %4 = getelementptr %0, ptr addrspace(9) @__push_constants, i32 0, i32 0, i32 0 + %5 = load i32, ptr addrspace(9) %4, align 8 + %6 = getelementptr inbounds %0, ptr addrspace(9) @__push_constants, i32 0, i32 0, i32 1 + %7 = load i32, ptr addrspace(9) %6, align 4 + %8 = getelementptr inbounds %0, ptr addrspace(9) @__push_constants, i32 0, i32 0, i32 2 + %9 = load i32, ptr addrspace(9) %8, align 8 + %10 = getelementptr inbounds %0, ptr addrspace(9) @__push_constants, i32 0, i32 0, i32 3 + %11 = load i32, ptr addrspace(9) %10, align 4 + %12 = call <4 x i32> @_Z25clspv.composite_construct.0(i32 %5, i32 %7, i32 %9, i32 %11) + %13 = sitofp <4 x i32> %12 to <4 x float> + %14 = tail call <4 x float> @_Z11read_imagef30ocl_image3d_ro_t.float.sampled11ocl_samplerDv4_f(target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) %0, target("spirv.Sampler") %1, <4 x float> %13) #2 + store <4 x float> %14, ptr addrspace(1) %3, align 16 + ret void +} + +declare target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0) @_Z14clspv.resource.0(i32, i32, i32, i32, i32, i32, target("spirv.Image", float, 2, 0, 0, 0, 1, 0, 0, 0)) + +declare target("spirv.Sampler") @_Z14clspv.resource.1(i32, i32, i32, i32, i32, i32, target("spirv.Sampler")) + +declare ptr addrspace(1) @_Z14clspv.resource.2(i32, i32, i32, i32, i32, i32, { [0 x <4 x float>] }) + +; Function Attrs: memory(read) +declare <4 x i32> @_Z25clspv.composite_construct.0(i32, i32, i32, i32) #1 + +attributes #0 = { norecurse nounwind "less-precise-fpmad"="true" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #1 = { memory(read) } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!1, !2, !3} +!opencl.ocl.version = !{!4} +!opencl.spir.version = !{!4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} +!llvm.ident = !{!5, !6, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !6, !6, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7, !7} +!_Z28clspv.entry_point_attributes = !{!8, !9, !10, !11, !12, !13, !14} +!clspv.descriptor.index = !{!15} + +!0 = !{i32 7} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"direct-access-external-data", i32 0} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{i32 1, i32 2} +!5 = !{!"clang version 18.0.0 (git@github.com:rjodinchr/llvm-project.git 9dd7a0568c68e41f287de190ae62950d273405c8)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 1e6fc9626c0f49ce952a67aef47e86253d13f74a)"} +!7 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project ab674234c440ed27302f58eeccc612c83b32c43f)"} +!8 = !{!"_Z4sqrtf", !" __attribute__((overloadable)) __attribute__((const))"} +!9 = !{!"_Z4sqrtDv2_f", !" __attribute__((overloadable)) __attribute__((const))"} +!10 = !{!"_Z4sqrtDv3_f", !" __attribute__((overloadable)) __attribute__((const))"} +!11 = !{!"_Z4sqrtDv4_f", !" __attribute__((overloadable)) __attribute__((const))"} +!12 = !{!"_Z4sqrtDv8_f", !" __attribute__((overloadable)) __attribute__((const))"} +!13 = !{!"_Z4sqrtDv16_f", !" __attribute__((overloadable)) __attribute__((const))"} +!14 = !{!"foo", !" kernel"} +!15 = !{i32 1} +!16 = !{i32 1, i32 0, i32 1, i32 0} +!17 = !{!"read_only", !"none", !"none", !"none"} +!18 = !{!"image3d_t", !"sampler_t", !"float4*", !"int4"} +!19 = !{!"image3d_t", !"sampler_t", !"float __attribute__((ext_vector_type(4)))*", !"int __attribute__((ext_vector_type(4)))"} +!20 = !{!"", !"", !"", !""} +!21 = !{!"img", !"sampler", !"out", !"coord"} +!22 = !{i32 3} +!23 = !{!24, !25, !26, !27} +!24 = !{!"img", i32 0, i32 0, i32 0, i32 0, !"ro_image"} +!25 = !{!"sampler", i32 1, i32 1, i32 0, i32 0, !"sampler"} +!26 = !{!"out", i32 2, i32 2, i32 0, i32 0, !"buffer"} +!27 = !{!"coord", i32 3, i32 -1, i32 0, i32 16, !"pod_pushconstant"} diff --git a/test/ImageBuiltins/read_imagef_sampler_float4.cl b/test/ImageBuiltins/read_imagef_sampler_float4.cl index b0e530c0f..430334b59 100644 --- a/test/ImageBuiltins/read_imagef_sampler_float4.cl +++ b/test/ImageBuiltins/read_imagef_sampler_float4.cl @@ -1,20 +1,52 @@ -// RUN: clspv %target %s -o %t.spv +// RUN: clspv %target %s -o %t.spv --cl-native-math // RUN: spirv-dis -o %t2.spvasm %t.spv // RUN: FileCheck %s < %t2.spvasm // RUN: spirv-val --target-env vulkan1.0 %t.spv // CHECK-NOT: OpCapability StorageImageReadWithoutFormat +// CHECK-DAG: %[[BOOL_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeBool +// CHECK-DAG: %[[INT_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeInt 32 0 // CHECK-DAG: %[[FLOAT_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeFloat 32 // CHECK-DAG: %[[SAMPLER_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeSampler // CHECK-DAG: %[[READ_ONLY_IMAGE_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeImage %[[FLOAT_TYPE_ID]] 3D 0 0 0 1 Unknown // CHECK-DAG: %[[FLOAT4_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeVector %[[FLOAT_TYPE_ID]] 4 +// CHECK-DAG: %[[INT4_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeVector %[[INT_TYPE_ID]] 4 +// CHECK-DAG: %[[INT3_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeVector %[[INT_TYPE_ID]] 3 +// CHECK-DAG: %[[BOOL4_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeVector %[[BOOL_TYPE_ID]] 4 // CHECK-DAG: %[[SAMPLED_IMAGE_TYPE_ID:[a-zA-Z0-9_]*]] = OpTypeSampledImage %[[READ_ONLY_IMAGE_TYPE_ID]] +// CHECK-DAG: %[[FP_CONSTANT_0_5_ID:[a-zA-Z0-9_]*]] = OpConstant %[[FLOAT_TYPE_ID]] 0.5 // CHECK-DAG: %[[FP_CONSTANT_0_ID:[a-zA-Z0-9_]*]] = OpConstant %[[FLOAT_TYPE_ID]] 0 +// CHECK-DAG: %[[UINT_0:[a-zA-Z0-9_]*]] = OpConstant %[[INT_TYPE_ID]] 0 +// CHECK-DAG: %[[UINT_16:[a-zA-Z0-9_]*]] = OpConstant %[[INT_TYPE_ID]] 16 +// CHECK-DAG: %[[UINT_1:[a-zA-Z0-9_]*]] = OpConstant %[[INT_TYPE_ID]] 1 +// CHECK-DAG: %[[UINT_48:[a-zA-Z0-9_]*]] = OpConstant %[[INT_TYPE_ID]] 48 // CHECK: %[[S_LOAD_ID:[a-zA-Z0-9_]*]] = OpLoad %[[SAMPLER_TYPE_ID]] // CHECK: %[[I_LOAD_ID:[a-zA-Z0-9_]*]] = OpLoad %[[READ_ONLY_IMAGE_TYPE_ID]] -// CHECK: %[[C_LOAD_ID:[a-zA-Z0-9_]*]] = OpCompositeExtract %[[FLOAT4_TYPE_ID]] + +// CHECK: %[[COORD_ID:[a-zA-Z0-9_]*]] = OpCompositeConstruct %[[INT4_TYPE_ID]] +// CHECK: %[[BITCAST:[a-zA-Z0-9_]*]] = OpBitcast %[[FLOAT4_TYPE_ID]] %[[COORD_ID]] +// CHECK: %[[IMAGE_SIZES3:[a-zA-Z0-9_]*]] = OpImageQuerySizeLod %[[INT3_TYPE_ID]] %[[I_LOAD_ID]] +// CHECK: %[[IMAGE_SIZES4:[a-zA-Z0-9_]*]] = OpCompositeConstruct %[[INT4_TYPE_ID]] %[[IMAGE_SIZES3]] +// CHECK: %[[CONVERT:[a-zA-Z0-9_]*]] = OpConvertSToF %[[FLOAT4_TYPE_ID]] %[[IMAGE_SIZES4]] +// CHECK: %[[FLOOR:[a-zA-Z0-9_]*]] = OpExtInst %[[FLOAT4_TYPE_ID]] {{.*}} Floor %[[BITCAST]] +// CHECK: %[[FADD:[a-zA-Z0-9_]*]] = OpFAdd %[[FLOAT4_TYPE_ID]] %[[FLOOR]] {{.*}} +// CHECK: %[[FDIV_NEAREST:[a-zA-Z0-9_]*]] = OpFDiv %[[FLOAT4_TYPE_ID]] %[[FADD]] %[[CONVERT]] +// CHECK: %[[FDIV_LINEAR:[a-zA-Z0-9_]*]] = OpFDiv %[[FLOAT4_TYPE_ID]] %[[BITCAST]] %[[CONVERT]] +// CHECK: %[[GEP_SAMPLER_MASK:[a-zA-Z0-9_]*]] = OpAccessChain {{.*}} {{.*}} %[[UINT_1]] %[[UINT_0]] +// CHECK: %[[SAMPLER_MASK:[a-zA-Z0-9_]*]] = OpLoad %[[INT_TYPE_ID]] %[[GEP_SAMPLER_MASK]] +// CHECK: %[[AND:[a-zA-Z0-9_]*]] = OpBitwiseAnd %[[INT_TYPE_ID]] %[[SAMPLER_MASK]] %[[UINT_48]] +// CHECK: %[[CMP:[a-zA-Z0-9_]*]] = OpIEqual %[[BOOL_TYPE_ID]] %[[AND]] %[[UINT_16]] +// CHECK: %[[INSERT:[a-zA-Z0-9_]*]] = OpCompositeInsert %[[BOOL4_TYPE_ID]] %[[CMP]] {{.*}} 0 +// CHECK: %[[SHUFFLE:[a-zA-Z0-9_]*]] = OpVectorShuffle %[[BOOL4_TYPE_ID]] %[[INSERT]] {{.*}} 0 0 0 0 +// CHECK: %[[SELECT:[a-zA-Z0-9_]*]] = OpSelect %[[FLOAT4_TYPE_ID]] %[[SHUFFLE]] %[[FDIV_NEAREST]] %[[FDIV_LINEAR]] +// CHECK: %[[AND:[a-zA-Z0-9_]*]] = OpBitwiseAnd %[[INT_TYPE_ID]] %[[SAMPLER_MASK]] %[[UINT_1]] +// CHECK: %[[CMP:[a-zA-Z0-9_]*]] = OpIEqual %[[BOOL_TYPE_ID]] %[[AND]] %[[UINT_1]] +// CHECK: %[[INSERT:[a-zA-Z0-9_]*]] = OpCompositeInsert %[[BOOL4_TYPE_ID]] %[[CMP]] {{.*}} 0 +// CHECK: %[[SHUFFLE:[a-zA-Z0-9_]*]] = OpVectorShuffle %[[BOOL4_TYPE_ID]] %[[INSERT]] {{.*}} 0 0 0 0 +// CHECK: %[[OP_SELECT_ID:[a-zA-Z0-9_]*]] = OpSelect %[[FLOAT4_TYPE_ID]] %[[SHUFFLE]] %[[BITCAST]] %[[SELECT]] + // CHECK: %[[SAMPLED_IMAGE_ID:[a-zA-Z0-9_]*]] = OpSampledImage %[[SAMPLED_IMAGE_TYPE_ID]] %[[I_LOAD_ID]] %[[S_LOAD_ID]] -// CHECK: %[[OP_ID:[a-zA-Z0-9_]*]] = OpImageSampleExplicitLod %[[FLOAT4_TYPE_ID]] %[[SAMPLED_IMAGE_ID]] %[[C_LOAD_ID]] Lod %[[FP_CONSTANT_0_ID]] +// CHECK: %[[OP_ID:[a-zA-Z0-9_]*]] = OpImageSampleExplicitLod %[[FLOAT4_TYPE_ID]] %[[SAMPLED_IMAGE_ID]] %[[OP_SELECT_ID]] Lod %[[FP_CONSTANT_0_ID]] // CHECK: OpStore {{.*}} %[[OP_ID]] void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read_only image3d_t i, float4 c, global float4* a) diff --git a/test/ImageBuiltins/read_imagei_sampler_float4.cl b/test/ImageBuiltins/read_imagei_sampler_float4.cl index 74c4ef8dc..345009057 100644 --- a/test/ImageBuiltins/read_imagei_sampler_float4.cl +++ b/test/ImageBuiltins/read_imagei_sampler_float4.cl @@ -22,13 +22,17 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read // CHECK-DAG: [[_v4int:%[0-9a-zA-Z_]+]] = OpTypeVector [[_int]] 4 // CHECK-DAG: [[_4:%[0-9a-zA-Z_]+]] = OpTypeImage [[_int]] 3D 0 0 0 1 Unknown // CHECK-DAG: [[_v4float:%[0-9a-zA-Z_]+]] = OpTypeVector [[_float]] 4 +// CHECK-DAG: [[ptr_uniform_v4float:%[0-9a-zA-Z_]+]] = OpTypePointer Uniform [[_v4float]] // CHECK-DAG: [[_18:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[_4]] +// CHECK-DAG: [[_float_0_0625:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.0625 +// CHECK-DAG: [[_float_0_5:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.5 // CHECK-DAG: [[_float_0:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0 // CHECK: [[_27:%[0-9a-zA-Z_]+]] = OpLoad [[_2]] // CHECK: [[_28:%[0-9a-zA-Z_]+]] = OpLoad [[_4]] -// CHECK: [[_30:%[0-9a-zA-Z_]+]] = OpLoad [[_v4float]] +// CHECK: [[gep:%[0-9a-zA-Z_]+]] = OpAccessChain [[ptr_uniform_v4float]] +// CHECK: [[_30:%[0-9a-zA-Z_]+]] = OpLoad [[_v4float]] [[gep]] // CHECK: [[_32:%[0-9a-zA-Z_]+]] = OpSampledImage [[_18]] [[_28]] [[_27]] -// CHECK: [[_33:%[0-9a-zA-Z_]+]] = OpImageSampleExplicitLod [[_v4int]] [[_32]] [[_30]] Lod [[_float_0]] +// CHECK: [[_33:%[0-9a-zA-Z_]+]] = OpImageSampleExplicitLod [[_v4int]] [[_32]] {{.*}} Lod [[_float_0]] // CHECK: [[cast:%[0-9a-zA-Z_]+]] = OpBitcast [[_v4uint]] [[_33]] // CHECK: OpStore {{.*}} [[cast]] @@ -37,20 +41,21 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read // CLUSTER-DAG: [[_float:%[a-zA-Z0-9_]+]] = OpTypeFloat 32 // CLUSTER-DAG: [[_2:%[a-zA-Z0-9_]+]] = OpTypeSampler // CLUSTER-DAG: [[_uint:%[0-9a-zA-Z_]+]] = OpTypeInt 32 0 +// CLUSTER-DAG: [[ptr_pushconstant_uint:%[0-9a-zA-Z_]+]] = OpTypePointer PushConstant [[_uint]] // CLUSTER-DAG: [[_v4uint:%[0-9a-zA-Z_]+]] = OpTypeVector [[_uint]] 4 // CLUSTER-DAG: [[_int:%[0-9a-zA-Z_]+]] = OpTypeInt 32 1 // CLUSTER-DAG: [[_v4int:%[0-9a-zA-Z_]+]] = OpTypeVector [[_int]] 4 // CLUSTER-DAG: [[_4:%[0-9a-zA-Z_]+]] = OpTypeImage [[_int]] 3D 0 0 0 1 Unknown // CLUSTER-DAG: [[_v4float:%[a-zA-Z0-9_]+]] = OpTypeVector [[_float]] 4 -// CLUSTER-DAG: [[__struct_12:%[a-zA-Z0-9_]+]] = OpTypeStruct [[_v4float]] // CLUSTER-DAG: [[_19:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[_4]] +// CLUSTER-DAG: [[_float_0_0625:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.0625 +// CLUSTER-DAG: [[_float_0_5:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.5 // CLUSTER-DAG: [[_float_0:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0 // CLUSTER: [[_28:%[a-zA-Z0-9_]+]] = OpLoad [[_2]] // CLUSTER: [[_29:%[a-zA-Z0-9_]+]] = OpLoad [[_4]] -// CLUSTER: [[_32:%[a-zA-Z0-9_]+]] = OpLoad [[__struct_12]] -// CLUSTER: [[_33:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[_v4float]] [[_32]] 0 +// CLUSTER-COUNT-4: [[_32:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_pushconstant_uint]] // CLUSTER: [[_34:%[a-zA-Z0-9_]+]] = OpSampledImage [[_19]] [[_29]] [[_28]] -// CLUSTER: [[_35:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod [[_v4int]] [[_34]] [[_33]] Lod [[_float_0]] +// CLUSTER: [[_35:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod [[_v4int]] [[_34]] {{.*}} Lod [[_float_0]] // CLUSTER: [[cast:%[a-zA-Z0-9_]+]] = OpBitcast [[_v4uint]] [[_35]] // CLUSTER: OpStore {{.*}} [[cast]] diff --git a/test/ImageBuiltins/read_imageui_sampler_float4.cl b/test/ImageBuiltins/read_imageui_sampler_float4.cl index 7aa86d9a6..5f1decf32 100644 --- a/test/ImageBuiltins/read_imageui_sampler_float4.cl +++ b/test/ImageBuiltins/read_imageui_sampler_float4.cl @@ -20,13 +20,17 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read // CHECK-DAG: [[_v4uint:%[0-9a-zA-Z_]+]] = OpTypeVector [[_uint]] 4 // CHECK-DAG: [[_4:%[0-9a-zA-Z_]+]] = OpTypeImage [[_uint]] 3D 0 0 0 1 Unknown // CHECK-DAG: [[_v4float:%[0-9a-zA-Z_]+]] = OpTypeVector [[_float]] 4 +// CHECK-DAG: [[ptr_uniform_v4float:%[0-9a-zA-Z_]+]] = OpTypePointer Uniform [[_v4float]] // CHECK-DAG: [[_18:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[_4]] +// CHECK-DAG: [[_float_0_0625:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.0625 +// CHECK-DAG: [[_float_0_5:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.5 // CHECK-DAG: [[_float_0:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0 // CHECK: [[_27:%[0-9a-zA-Z_]+]] = OpLoad [[_2]] // CHECK: [[_28:%[0-9a-zA-Z_]+]] = OpLoad [[_4]] -// CHECK: [[_30:%[0-9a-zA-Z_]+]] = OpLoad [[_v4float]] +// CHECK: [[gep:%[0-9a-zA-Z_]+]] = OpAccessChain [[ptr_uniform_v4float]] +// CHECK: [[_30:%[0-9a-zA-Z_]+]] = OpLoad [[_v4float]] [[gep]] // CHECK: [[_32:%[0-9a-zA-Z_]+]] = OpSampledImage [[_18]] [[_28]] [[_27]] -// CHECK: [[_33:%[0-9a-zA-Z_]+]] = OpImageSampleExplicitLod [[_v4uint]] [[_32]] [[_30]] Lod [[_float_0]] +// CHECK: [[_33:%[0-9a-zA-Z_]+]] = OpImageSampleExplicitLod [[_v4uint]] [[_32]] {{.*}} Lod [[_float_0]] // CHECK: OpStore {{.*}} [[_33]] // In a second round, check -cluster-pod-kernel-args @@ -34,18 +38,19 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read // CLUSTER-DAG: [[_float:%[a-zA-Z0-9_]+]] = OpTypeFloat 32 // CLUSTER-DAG: [[_2:%[a-zA-Z0-9_]+]] = OpTypeSampler // CLUSTER-DAG: [[_uint:%[0-9a-zA-Z_]+]] = OpTypeInt 32 0 +// CLUSTER-DAG: [[ptr_pushconstant_uint:%[0-9a-zA-Z_]+]] = OpTypePointer PushConstant [[_uint]] // CLUSTER-DAG: [[_v4uint:%[0-9a-zA-Z_]+]] = OpTypeVector [[_uint]] 4 // CLUSTER-DAG: [[_4:%[0-9a-zA-Z_]+]] = OpTypeImage [[_uint]] 3D 0 0 0 1 Unknown // CLUSTER-DAG: [[_v4float:%[a-zA-Z0-9_]+]] = OpTypeVector [[_float]] 4 -// CLUSTER-DAG: [[__struct_12:%[a-zA-Z0-9_]+]] = OpTypeStruct [[_v4float]] // CLUSTER-DAG: [[_19:%[a-zA-Z0-9_]+]] = OpTypeSampledImage [[_4]] +// CLUSTER-DAG: [[_float_0_0625:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.0625 +// CLUSTER-DAG: [[_float_0_5:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0.5 // CLUSTER-DAG: [[_float_0:%[0-9a-zA-Z_]+]] = OpConstant [[_float]] 0 // CLUSTER: [[_28:%[a-zA-Z0-9_]+]] = OpLoad [[_2]] // CLUSTER: [[_29:%[a-zA-Z0-9_]+]] = OpLoad [[_4]] -// CLUSTER: [[_32:%[a-zA-Z0-9_]+]] = OpLoad [[__struct_12]] -// CLUSTER: [[_33:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[_v4float]] [[_32]] 0 +// CLUSTER-COUNT-4: [[_32:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_pushconstant_uint]] // CLUSTER: [[_34:%[a-zA-Z0-9_]+]] = OpSampledImage [[_19]] [[_29]] [[_28]] -// CLUSTER: [[_35:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod [[_v4uint]] [[_34]] [[_33]] Lod [[_float_0]] +// CLUSTER: [[_35:%[a-zA-Z0-9_]+]] = OpImageSampleExplicitLod [[_v4uint]] [[_34]] {{.*}} Lod [[_float_0]] // CLUSTER: OpStore {{.*}} [[_35]] diff --git a/test/ImageBuiltins/read_only_image3d_passed_to_other_function.cl b/test/ImageBuiltins/read_only_image3d_passed_to_other_function.cl index 4ad78faf2..eff4e8d37 100644 --- a/test/ImageBuiltins/read_only_image3d_passed_to_other_function.cl +++ b/test/ImageBuiltins/read_only_image3d_passed_to_other_function.cl @@ -3,15 +3,17 @@ // RUN: FileCheck %s < %t2.spvasm // RUN: spirv-val --target-env vulkan1.0 %t.spv +const sampler_t s = CLK_NORMALIZED_COORDS_TRUE; + __attribute__((noinline)) -float4 bar(sampler_t s, read_only image3d_t i, float4 c) +float4 bar(read_only image3d_t i, float4 c) { return read_imagef(i, s, c); } -void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read_only image3d_t i, float4 c, global float4* a) +void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(read_only image3d_t i, float4 c, global float4* a) { - *a = bar(s, i, c); + *a = bar(i, c); } // CHECK-DAG: [[_float:%[0-9a-zA-Z_]+]] = OpTypeFloat 32 // CHECK-DAG: [[_2:%[0-9a-zA-Z_]+]] = OpTypeSampler @@ -29,5 +31,6 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read // CHECK: [[_29:%[0-9a-zA-Z_]+]] = OpLoad [[_4]] // CHECK: [[_30:%[0-9a-zA-Z_]+]] = OpLoad [[_2]] // CHECK: [[_31:%[0-9a-zA-Z_]+]] = OpSampledImage [[_17]] [[_29]] [[_30]] + // CHECK: [[_32:%[0-9a-zA-Z_]+]] = OpImageSampleExplicitLod [[_v4float]] [[_31]] [[_27]] Lod [[_float_0]] // CHECK: OpReturnValue [[_32]] diff --git a/test/ImageBuiltins/two_int_images.cl b/test/ImageBuiltins/two_int_images.cl index aa0998caf..47daebc6b 100644 --- a/test/ImageBuiltins/two_int_images.cl +++ b/test/ImageBuiltins/two_int_images.cl @@ -8,9 +8,10 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) foo(sampler_t s, read *a = read_imagei(i, s, c); } -void kernel __attribute__((reqd_work_group_size(1, 1, 1))) bar(sampler_t s, read_only image3d_t i, float4 c, global int4* a) +const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE; +void kernel __attribute__((reqd_work_group_size(1, 1, 1))) bar(read_only image3d_t i, float4 c, global int4* a) { - *a = read_imagei(i, s, c); + *a = read_imagei(i, sampler, c); } // CHECK-DAG: OpEntryPoint GLCompute [[foo:%[a-zA-Z0-9_]+]] "foo" @@ -37,9 +38,9 @@ void kernel __attribute__((reqd_work_group_size(1, 1, 1))) bar(sampler_t s, read // CHECK: [[cast:%[0-9a-zA-Z_]+]] = OpBitcast [[_v4uint]] [[_33]] // CHECK: OpStore {{.*}} [[cast]] // CHECK: [[bar]] = OpFunction -// CHECK: [[_27:%[0-9a-zA-Z_]+]] = OpLoad [[_2]] // CHECK: [[_28:%[0-9a-zA-Z_]+]] = OpLoad [[_3D]] // CHECK: [[_30:%[0-9a-zA-Z_]+]] = OpCompositeExtract [[_v4float]] +// CHECK: [[_27:%[0-9a-zA-Z_]+]] = OpLoad [[_2]] // CHECK: [[_32:%[0-9a-zA-Z_]+]] = OpSampledImage [[sampled3D]] [[_28]] [[_27]] // CHECK: [[_33:%[0-9a-zA-Z_]+]] = OpImageSampleExplicitLod [[_v4int]] [[_32]] [[_30]] Lod [[_float_0]] // CHECK: [[cast:%[0-9a-zA-Z_]+]] = OpBitcast [[_v4uint]] [[_33]] diff --git a/test/InlineFuncWithReadImage3DNonLiteralSampler/read_image3d.ll b/test/InlineFuncWithReadImage3DNonLiteralSampler/read_image3d.ll new file mode 100644 index 000000000..35cfd24bc --- /dev/null +++ b/test/InlineFuncWithReadImage3DNonLiteralSampler/read_image3d.ll @@ -0,0 +1,60 @@ +; RUN: clspv-opt %s -o %t.ll --passes=inline-func-with-read-image3d-non-literal-sampler +; RUN: FileCheck %s < %t.ll + +; CHECK-NOT: call spirv-func <4 x float> @bar + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +@__spirv_WorkgroupSize = addrspace(8) global <3 x i32> zeroinitializer + +; Function Attrs: convergent norecurse nounwind +define dso_local spir_func <4 x float> @bar(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler) #0 !kernel_arg_name !14 { +entry: + %0 = call <4 x float> @_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler, <4 x float> ) #2 + ret <4 x float> %0 +} + +; Function Attrs: convergent norecurse nounwind +define dso_local spir_kernel void @foo(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler, ptr addrspace(1) align 16 %out) #1 !kernel_arg_addr_space !15 !kernel_arg_access_qual !16 !kernel_arg_type !17 !kernel_arg_base_type !18 !kernel_arg_type_qual !19 !kernel_arg_name !20 !clspv.pod_args_impl !21 { +entry: + %call = call spir_func <4 x float> @bar(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %img, target("spirv.Sampler") %sampler) #3 + store <4 x float> %call, ptr addrspace(1) %out, align 16 + ret void +} + +declare <4 x float> @_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0), target("spirv.Sampler"), <4 x float>) + +attributes #0 = { convergent norecurse nounwind "less-precise-fpmad"="true" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" } +attributes #1 = { convergent norecurse nounwind "less-precise-fpmad"="true" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #2 = { nounwind } +attributes #3 = { convergent nobuiltin nounwind "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3} +!llvm.ident = !{!4, !5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !5, !5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} +!_Z28clspv.entry_point_attributes = !{!7, !8, !9, !10, !11, !12, !13} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"direct-access-external-data", i32 0} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 2} +!4 = !{!"clang version 18.0.0 (git@github.com:rjodinchr/llvm-project.git 9dd7a0568c68e41f287de190ae62950d273405c8)"} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 1e6fc9626c0f49ce952a67aef47e86253d13f74a)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project ab674234c440ed27302f58eeccc612c83b32c43f)"} +!7 = !{!"_Z4sqrtf", !" __attribute__((overloadable)) __attribute__((const))"} +!8 = !{!"_Z4sqrtDv2_f", !" __attribute__((overloadable)) __attribute__((const))"} +!9 = !{!"_Z4sqrtDv3_f", !" __attribute__((overloadable)) __attribute__((const))"} +!10 = !{!"_Z4sqrtDv4_f", !" __attribute__((overloadable)) __attribute__((const))"} +!11 = !{!"_Z4sqrtDv8_f", !" __attribute__((overloadable)) __attribute__((const))"} +!12 = !{!"_Z4sqrtDv16_f", !" __attribute__((overloadable)) __attribute__((const))"} +!13 = !{!"foo", !" kernel"} +!14 = !{!"img", !"sampler"} +!15 = !{i32 1, i32 0, i32 1} +!16 = !{!"read_only", !"none", !"none"} +!17 = !{!"image3d_t", !"sampler_t", !"float4*"} +!18 = !{!"image3d_t", !"sampler_t", !"float __attribute__((ext_vector_type(4)))*"} +!19 = !{!"", !"", !""} +!20 = !{!"img", !"sampler", !"out"} +!21 = !{i32 3}