Skip to content

Commit

Permalink
Implement read of 3D images with unnormalised sampler (#1234)
Browse files Browse the repository at this point in the history
We need to know where the sampler is coming from, so we need to inline
every function containing a read image of a 3d image with non literal
sampler.

This PR depends on KhronosGroup/SPIRV-Headers#377
We use the information from the sampler mask to know if you need to
use the original coordinates or the one we have normalised.

Like for channel_image_order and channel_image_data_type we use
metadata to pass information through the pipeline.
  • Loading branch information
rjodinchr authored Oct 12, 2023
1 parent 1218538 commit fe4555f
Show file tree
Hide file tree
Showing 52 changed files with 965 additions and 240 deletions.
2 changes: 1 addition & 1 deletion deps.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"subrepo" : "KhronosGroup/SPIRV-Headers",
"branch" : "main",
"subdir" : "third_party/SPIRV-Headers",
"commit" : "fc7d2462765183c784a0c46beb13eee9e506a067"
"commit" : "4183b260f4cccae52a89efdfcdd43c4897989f42"
},
{
"name" : "SPIRV-Tools",
Expand Down
2 changes: 2 additions & 0 deletions include/clspv/PushConstant.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ enum class PushConstant : int {
ImageMetadata,
ModuleConstantsPointer,
PrintfBufferPointer,
NormalizedSamplerMask,
};

enum class ImageMetadata : int {
ChannelOrder,
ChannelDataType,
NormalizedSamplerMask,
};

// Returns the name of the push constant from its enum.
Expand Down
37 changes: 36 additions & 1 deletion lib/AutoPodArgsPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "Constants.h"
#include "Layout.h"
#include "PushConstant.h"
#include "SamplerUtils.h"
#include "Types.h"

using namespace llvm;
Expand Down Expand Up @@ -83,6 +84,36 @@ bool FunctionContainsImageChannelGetter(Function *F) {
}
return false;
}
bool FunctionContainsReadImage3DNonLiteralSampler(Function *F) {
std::set<Function *> visited_fct;
SmallVector<Function *, 1> fcts_to_visit;
fcts_to_visit.push_back(F);
while (!fcts_to_visit.empty()) {
SmallVector<Function *, 1> next_fcts_to_visit;
for (auto *fct : fcts_to_visit) {
visited_fct.insert(fct);
for (auto &BB : *fct) {
for (auto &I : BB) {
if (auto call = dyn_cast<CallInst>(&I)) {
auto Name = call->getCalledFunction()->getName();
if (Name.contains("read_image")) {
if (clspv::isReadImage3DWithNonLiteralSampler(call)) {
return true;
}
} else {
Function *f = call->getCalledFunction();
if (visited_fct.count(f) == 0) {
next_fcts_to_visit.push_back(f);
}
}
}
}
}
}
fcts_to_visit = std::move(next_fcts_to_visit);
}
return false;
}
} // namespace

void clspv::AutoPodArgsPass::runOnFunction(Function &F) {
Expand Down Expand Up @@ -118,6 +149,8 @@ void clspv::AutoPodArgsPass::runOnFunction(Function &F) {
}
}
const bool contains_image_channel_getter = FunctionContainsImageChannelGetter(&F);
const bool contains_read_image_3d_non_literal_sampler =
FunctionContainsReadImage3DNonLiteralSampler(&F);

// Per-kernel push constant interface requires:
// 1. Clustered pod args.
Expand All @@ -127,6 +160,7 @@ void clspv::AutoPodArgsPass::runOnFunction(Function &F) {
// 5. If 16-bit types are used, 16-bit push constants are supported.
// 6. If 8-bit types are used, 8-bit push constants are supported.
// 7. Not to have a image channel getter function call.
// 8. Not to have a read_image of 3d image with a non-literal sampler.
const auto pod_struct_ty = StructType::get(M.getContext(), pod_types);
const bool contains_array = ContainsArrayType(pod_struct_ty);
const bool support_16bit_pc = !ContainsSizedType(pod_struct_ty, 16) ||
Expand All @@ -144,7 +178,8 @@ void clspv::AutoPodArgsPass::runOnFunction(Function &F) {
clspv::Option::ClusterPodKernelArgs() && support_16bit_pc &&
support_8bit_pc && fits_push_constant &&
!clspv::UsesGlobalPushConstants(M) && !contains_array &&
!contains_image_channel_getter;
!contains_image_channel_getter &&
!contains_read_image_3d_non_literal_sampler;

// Global type-mangled push constants require:
// 1. Clustered pod args.
Expand Down
2 changes: 1 addition & 1 deletion lib/BuiltinsEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ enum BuiltinType : unsigned int {
kSpirvCopyMemory,
kClspvSamplerVarLiteral,
kClspvCompositeConstruct,
kClspvGetImageSizes,
kClspvGetNormalizedSamplerMask,
kType_Clspv_End,

kType_Async_Start,
Expand Down
2 changes: 1 addition & 1 deletion lib/BuiltinsMap.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,7 @@ static std::unordered_map<const char *, Builtins::BuiltinType, cstr_hash,
{"clspv.sampler_var_literal", Builtins::kClspvSamplerVarLiteral},
{"clspv.composite_construct", Builtins::kClspvCompositeConstruct},

{"clspv.get_image_sizes", Builtins::kClspvGetImageSizes},
{"clspv.get_normalized_sampler_mask", Builtins::kClspvGetNormalizedSamplerMask},
};

#endif // CLSPV_LIB_BUILTINSMAP_INC_
3 changes: 2 additions & 1 deletion lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,13 @@ add_library(clspv_passes OBJECT
${CMAKE_CURRENT_SOURCE_DIR}/InlineFuncWithImageMetadataGetterPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/InlineFuncWithPointerBitCastArgPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/InlineFuncWithPointerToFunctionArgPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/InlineFuncWithReadImage3DNonLiteralSampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/InlineFuncWithSingleCallSitePass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/KernelArgNamesToMetadataPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Layout.cpp
${CMAKE_CURRENT_SOURCE_DIR}/LogicalPointerToIntPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/LongVectorLoweringPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SetImageChannelMetadataPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SetImageMetadataPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ThreeElementVectorLoweringPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/WrapKernelPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/LowerAddrSpaceCastPass.cpp
Expand Down
8 changes: 5 additions & 3 deletions lib/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ int RunPassPipeline(llvm::Module &M, llvm::raw_svector_ostream *binaryStream) {
pm.addPass(clspv::InlineFuncWithPointerBitCastArgPass());
pm.addPass(clspv::InlineFuncWithPointerToFunctionArgPass());
pm.addPass(clspv::InlineFuncWithSingleCallSitePass());
pm.addPass(clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass());

if (clspv::Option::HackLogicalPtrtoint()) {
pm.addPass(llvm::createModuleToFunctionPassAdaptor(llvm::PromotePass()));
Expand Down Expand Up @@ -734,9 +735,10 @@ int RunPassPipeline(llvm::Module &M, llvm::raw_svector_ostream *binaryStream) {
// DataLayout anymore so leave this right before SPIR-V generation.
pm.addPass(clspv::UBOTypeTransformPass());

// This pass depends on the inlining of the image metadata getter from
// InlineFuncWithImageMetadataGetterPass
pm.addPass(clspv::SetImageChannelMetadataPass());
// This pass depends on the inlining of the image metadata from
// InlineFuncWithImageMetadataGetterPass and
// InlineFuncWithReadImage3DNonLiteralSamplerPass
pm.addPass(clspv::SetImageMetadataPass());

// This is needed to remove long vectors created by SROA passes. Especially
// with vstore_half, which tends to always recreate long vectors after the
Expand Down
12 changes: 12 additions & 0 deletions lib/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,18 @@ inline std::string ImageGetterPushConstantOffsetName() {
return "image_getter_push_constant_offset";
}

// Name of the function level metadata storing association between argument
// ordinal and push constant offset for sampler mask.
inline std::string PushConstantMetadataSamplerMaskName() {
return "push_constants_sampler_mask";
}

// Name for the call level metadata storing the offset in the push constants
// variable.
inline std::string SamplerMaskPushConstantOffsetName() {
return "sampler_mask_push_constant_offset";
}

// Name for module level metadata storing next spec constant id.
inline std::string NextSpecConstantMetadataName() {
return "clspv.next_spec_constant_id";
Expand Down
124 changes: 124 additions & 0 deletions lib/InlineFuncWithReadImage3DNonLiteralSampler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright 2023 The Clspv Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "llvm/ADT/UniqueVector.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Cloning.h"

#include "InlineFuncWithReadImage3DNonLiteralSampler.h"
#include "SamplerUtils.h"

#include <set>

using namespace llvm;

#define DEBUG_TYPE "inlinefuncwithreadimage3dnonliteralsamplerpass"

PreservedAnalyses clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass::run(
Module &M, ModuleAnalysisManager &) {
PreservedAnalyses PA;

// Loop through our inline pass until they stop changing thing.
bool changed = true;
while (changed) {
changed &= InlineFunctions(M);
}

return PA;
}

static bool FunctionShouldBeInlined(Function &F) {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
// If we have a call instruction...
if (auto call = dyn_cast<CallInst>(&I)) {
// ...which is calling read_image with a 3d image and a non literal
// sampler
if (clspv::isReadImage3DWithNonLiteralSampler(call)) {
return true;
}
}
}
}
return false;
}

static bool FunctionContainsReadImageWithSampler(Function &F) {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
// If we have a call instruction...
if (auto call = dyn_cast<CallInst>(&I)) {
auto Name = call->getCalledFunction()->getName();
if (Name.contains("read_image") && Name.contains("ocl_sampler")) {
return true;
}
}
}
}
return false;
}

bool clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass::InlineFunctions(
Module &M) {
bool Changed = false;

UniqueVector<CallInst *> WorkList;
std::set<Function *> FunctionToInline;
for (Function &F : M) {
if (F.isDeclaration() || F.getCallingConv() == CallingConv::SPIR_KERNEL) {
continue;
}
if (FunctionShouldBeInlined(F)) {
FunctionToInline.insert(&F);
}
}

if (FunctionToInline.empty()) {
return false;
}

// If we detect a read image of a 3D image with a non literal sampler, we need
// to inline every function with read_image because they might be using a non
// literal sampler used to read a 3D image, thus also needing a rework.
for (Function &F : M) {
if (F.isDeclaration() || F.getCallingConv() == CallingConv::SPIR_KERNEL) {
continue;
}
if (FunctionContainsReadImageWithSampler(F)) {
FunctionToInline.insert(&F);
}
}

for (Function &F : M) {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
// If we have a call instruction...
if (auto call = dyn_cast<CallInst>(&I)) {
// ...which is calling a function to inline
if (FunctionToInline.count(call->getCalledFunction()) > 0) {
WorkList.insert(call);
}
}
}
}
}

for (CallInst *Call : WorkList) {
InlineFunctionInfo IFI;
Changed |= InlineFunction(*Call, IFI, false, nullptr, false).isSuccess();
}

return Changed;
}
32 changes: 32 additions & 0 deletions lib/InlineFuncWithReadImage3DNonLiteralSampler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2023 The Clspv Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

#ifndef _CLSPV_LIB_INLINE_READ_IMAGE3D_NON_LITERAL_SAMPLER_H
#define _CLSPV_LIB_INLINE_READ_IMAGE3D_NON_LITERAL_SAMPLER_H

namespace clspv {

struct InlineFuncWithReadImage3DNonLiteralSamplerPass
: llvm::PassInfoMixin<InlineFuncWithReadImage3DNonLiteralSamplerPass> {
llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &);

bool InlineFunctions(llvm::Module &M);
};
} // namespace clspv

#endif // _CLSPV_LIB_INLINE_READ_IMAGE3D_NON_LITERAL_SAMPLER_H
3 changes: 2 additions & 1 deletion lib/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ MODULE_PASS("inline-entry-points-pass", clspv::InlineEntryPointsPass)
MODULE_PASS("inline-func-with-image-metadata-getter", clspv::InlineFuncWithImageMetadataGetterPass)
MODULE_PASS("inline-func-with-pointer-cast-arg", clspv::InlineFuncWithPointerBitCastArgPass)
MODULE_PASS("inline-func-with-pointer-function-arg", clspv::InlineFuncWithPointerToFunctionArgPass)
MODULE_PASS("inline-func-with-read-image3d-non-literal-sampler", clspv::InlineFuncWithReadImage3DNonLiteralSamplerPass)
MODULE_PASS("inline-func-with-single-call-site", clspv::InlineFuncWithSingleCallSitePass)
MODULE_PASS("kernel-argnames-to-metadata", clspv::KernelArgNamesToMetadataPass)
MODULE_PASS("logical-pointer-to-int", clspv::LogicalPointerToIntPass)
MODULE_PASS("long-vector-lowering", clspv::LongVectorLoweringPass)
MODULE_PASS("set-image-channel-metadata", clspv::SetImageChannelMetadataPass)
MODULE_PASS("set-image-metadata", clspv::SetImageMetadataPass)
MODULE_PASS("lower-addrspacecast", clspv::LowerAddrSpaceCastPass)
MODULE_PASS("lower-private-pointer-phi", clspv::LowerPrivatePointerPHIPass)
MODULE_PASS("multi-version-ubo-functions", clspv::MultiVersionUBOFunctionsPass)
Expand Down
3 changes: 2 additions & 1 deletion lib/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "InlineFuncWithImageMetadataGetterPass.h"
#include "InlineFuncWithPointerBitCastArgPass.h"
#include "InlineFuncWithPointerToFunctionArgPass.h"
#include "InlineFuncWithReadImage3DNonLiteralSampler.h"
#include "InlineFuncWithSingleCallSitePass.h"
#include "KernelArgNamesToMetadataPass.h"
#include "LogicalPointerToIntPass.h"
Expand All @@ -52,7 +53,7 @@
#include "RewritePackedStructs.h"
#include "SPIRVProducerPass.h"
#include "ScalarizePass.h"
#include "SetImageChannelMetadataPass.h"
#include "SetImageMetadataPass.h"
#include "ShareModuleScopeVariables.h"
#include "SignedCompareFixupPass.h"
#include "SimplifyPointerBitcastPass.h"
Expand Down
4 changes: 4 additions & 0 deletions lib/PushConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ const char *GetPushConstantName(PushConstant pc) {
return "module_constants_pointer";
case PushConstant::PrintfBufferPointer:
return "printf_buffer_pointer";
case PushConstant::NormalizedSamplerMask:
return "normalized_sampler_mask";
}
llvm_unreachable("Unknown PushConstant in GetPushConstantName");
return "";
Expand Down Expand Up @@ -83,6 +85,8 @@ Type *GetPushConstantType(Module &M, PushConstant pc) {
return IntegerType::get(C, 64);
case PushConstant::PrintfBufferPointer:
return IntegerType::get(C, 64);
case PushConstant::NormalizedSamplerMask:
return IntegerType::get(C, 64);
default:
break;
}
Expand Down
4 changes: 3 additions & 1 deletion lib/ReplaceOpenCLBuiltinPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3355,7 +3355,9 @@ bool ReplaceOpenCLBuiltinPass::replaceSampledReadImage(Function &F) {
IsUnnormalizedStaticSampler()) {
IRBuilder<> B(CI);
// normalized coordinate
Coord = NormalizedCoordinate(M, B, Coord, Img, image_ty);
auto ImgDimFP = GetImageDimFP(M, B, Img, image_ty);
Coord = NormalizedCoordinate(M, B, Coord, ImgDimFP,
SamplerInitValue & CLK_FILTER_NEAREST);
// copy the sampler but using normalized coordinate
Sampler = CallInst::Create(
SamplerFct,
Expand Down
Loading

0 comments on commit fe4555f

Please sign in to comment.