Skip to content

Commit

Permalink
HLSL-IR: implement insertBits full polyfill
Browse files Browse the repository at this point in the history
Implemented as a general polyfill, but only used by the HLSL backend
currently.

Tested CTS using dawn_node with `-use-ir` and the following now pass:
`webgpu:shader,execution,expression,call,builtin,insertBits:*`
`webgpu:shader,validation,expression,call,builtin,insertBits:*`

Bug: b/363199437
Bug: b/42251045
Change-Id: I985dbb178bd4778f2cd682c6226d9383416051db
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/206117
Reviewed-by: James Price <[email protected]>
Commit-Queue: Antonio Maiorano <[email protected]>
  • Loading branch information
amaiorano authored and Dawn LUCI CQ committed Sep 9, 2024
1 parent bc21c4d commit 8648fb1
Show file tree
Hide file tree
Showing 26 changed files with 1,227 additions and 230 deletions.
63 changes: 49 additions & 14 deletions src/tint/lang/core/ir/transform/builtin_polyfill.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,26 +411,26 @@ struct State {
} break;
case BuiltinPolyfillLevel::kFull: {
// Replace:
// result = extractBits(v, offset, count)
// result = extractBits(e, offset, count)
// With:
// let s = min(offset, 32u);
// let e = min(32u, (s + count));
// let shl = (32u - e);
// let shr = (shl + s);
// let shl_result = select(i32(), (v << shl), (shl < 32u));
// let t = min(32u, (s + count));
// let shl = (32u - t);
// let shr = (shl + s
// let shl_result = select(i32(), (e << shl), (shl < 32u));
// result = select(((shl_result >> 31u) >> 1u), (shl_result >> shr), (shr < 32u));
// }
auto* v = call->Args()[0];
auto* result_ty = v->Type();
auto* e = call->Args()[0];
auto* result_ty = e->Type();
auto* uint_ty = ty.match_width(ty.u32(), result_ty);
auto V = [&](uint32_t u) { return b.MatchWidth(u32(u), result_ty); };
b.InsertBefore(call, [&] {
auto* s = b.Call<u32>(core::BuiltinFn::kMin, offset, 32_u);
auto* e = b.Call<u32>(core::BuiltinFn::kMin, 32_u, b.Add(ty.u32(), s, count));
auto* shl = b.Subtract<u32>(32_u, e);
auto* t = b.Call<u32>(core::BuiltinFn::kMin, 32_u, b.Add(ty.u32(), s, count));
auto* shl = b.Subtract<u32>(32_u, t);
auto* shr = b.Add<u32>(shl, s);
auto* f1 = b.Zero(result_ty);
auto* t1 = b.ShiftLeft(result_ty, v, b.Construct(uint_ty, shl));
auto* t1 = b.ShiftLeft(result_ty, e, b.Construct(uint_ty, shl));
auto* shl_result = b.Call(result_ty, core::BuiltinFn::kSelect, f1, t1,
b.LessThan<bool>(shl, 32_u));
auto* f2 =
Expand Down Expand Up @@ -592,19 +592,54 @@ struct State {
case BuiltinPolyfillLevel::kClampOrRangeCheck: {
b.InsertBefore(call, [&] {
// Replace:
// insertBits(e, offset, count)
// insertBits(e, newbits, offset, count)
// With:
// let o = min(offset, 32);
// let c = min(count, w - o);
// insertBits(e, o, c);
// insertBits(e, newbits, o, c);
auto* o = b.Call(ty.u32(), core::BuiltinFn::kMin, offset, 32_u);
auto* c = b.Call(ty.u32(), core::BuiltinFn::kMin, count,
b.Subtract(ty.u32(), 32_u, o));
call->SetOperand(ir::CoreBuiltinCall::kArgsOperandOffset + 2, o->Result(0));
call->SetOperand(ir::CoreBuiltinCall::kArgsOperandOffset + 3, c->Result(0));
});
break;
}
} break;
case BuiltinPolyfillLevel::kFull: {
// Replace:
// result = insertBits(e, newbits, offset, count)
// With:
// let oc = (offset + count);
// let mask = ((select(0u, (1u << offset), (offset < 32u)) - 1u)
// ^ (select(0u, (1u << oc), (oc < 32u)) - 1u));
// result = ((select(i32(), (newbits << offset), (offset < 32u)) & i32(mask))
// | (e & i32(~(mask))));
auto* e = call->Args()[0];
auto* newbits = call->Args()[1];
auto* result_ty = e->Type();
auto* uint_ty = ty.match_width(ty.u32(), result_ty);
b.InsertBefore(call, [&] {
auto* oc = b.Add<u32>(offset, count);
auto* t1 = b.ShiftLeft<u32>(1_u, offset);
auto* s1 = b.Call<u32>(core::BuiltinFn::kSelect, b.Zero<u32>(), t1,
b.LessThan<bool>(offset, 32_u));
auto* t2 = b.ShiftLeft<u32>(1_u, oc);
auto* s2 = b.Call<u32>(core::BuiltinFn::kSelect, b.Zero<u32>(), t2,
b.LessThan<bool>(oc, 32_u));
auto* mask_lhs = b.Subtract<u32>(s1, 1_u);
auto* mask_rhs = b.Subtract<u32>(s2, 1_u);
auto* mask = b.Xor<u32>(mask_lhs, mask_rhs);
auto* f3 = b.Zero(result_ty);
auto* t3 = b.ShiftLeft(result_ty, newbits, b.Construct(uint_ty, offset));
auto* s3 = b.Call(result_ty, core::BuiltinFn::kSelect, f3, t3,
b.LessThan<bool>(offset, 32_u));
auto* result_lhs = b.And(result_ty, s3, b.Construct(result_ty, mask));
auto* result_rhs =
b.And(result_ty, e, b.Construct(result_ty, b.Complement<u32>(mask)));
auto* result = b.Or(result_ty, result_lhs, result_rhs);
result->SetResults(Vector{call->DetachResult()});
});
call->Destroy();
} break;
default:
TINT_UNIMPLEMENTED() << "insertBits polyfill level";
}
Expand Down
186 changes: 186 additions & 0 deletions src/tint/lang/core/ir/transform/builtin_polyfill_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1730,6 +1730,192 @@ TEST_F(IR_BuiltinPolyfillTest, InsertBits_ClampArgs_Vec4I32) {
EXPECT_EQ(expect, str());
}

TEST_F(IR_BuiltinPolyfillTest, InsertBits_Full_U32) {
Build(core::BuiltinFn::kInsertBits, ty.u32(), Vector{ty.u32(), ty.u32(), ty.u32(), ty.u32()});
auto* src = R"(
%foo = func(%arg:u32, %arg_1:u32, %arg_2:u32, %arg_3:u32):u32 { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%result:u32 = insertBits %arg, %arg_1, %arg_2, %arg_3
ret %result
}
}
)";
auto* expect = R"(
%foo = func(%arg:u32, %arg_1:u32, %arg_2:u32, %arg_3:u32):u32 { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%6:u32 = add %arg_2, %arg_3
%7:u32 = shl 1u, %arg_2
%8:bool = lt %arg_2, 32u
%9:u32 = select 0u, %7, %8
%10:u32 = shl 1u, %6
%11:bool = lt %6, 32u
%12:u32 = select 0u, %10, %11
%13:u32 = sub %9, 1u
%14:u32 = sub %12, 1u
%15:u32 = xor %13, %14
%16:u32 = construct %arg_2
%17:u32 = shl %arg_1, %16
%18:bool = lt %arg_2, 32u
%19:u32 = select 0u, %17, %18
%20:u32 = construct %15
%21:u32 = and %19, %20
%22:u32 = complement %15
%23:u32 = construct %22
%24:u32 = and %arg, %23
%result:u32 = or %21, %24
ret %result
}
}
)";

EXPECT_EQ(src, str());

BuiltinPolyfillConfig config;
config.insert_bits = BuiltinPolyfillLevel::kFull;
Run(BuiltinPolyfill, config);
EXPECT_EQ(expect, str());
}

TEST_F(IR_BuiltinPolyfillTest, InsertBits_Full_I32) {
Build(core::BuiltinFn::kInsertBits, ty.i32(), Vector{ty.i32(), ty.i32(), ty.u32(), ty.u32()});
auto* src = R"(
%foo = func(%arg:i32, %arg_1:i32, %arg_2:u32, %arg_3:u32):i32 { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%result:i32 = insertBits %arg, %arg_1, %arg_2, %arg_3
ret %result
}
}
)";
auto* expect = R"(
%foo = func(%arg:i32, %arg_1:i32, %arg_2:u32, %arg_3:u32):i32 { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%6:u32 = add %arg_2, %arg_3
%7:u32 = shl 1u, %arg_2
%8:bool = lt %arg_2, 32u
%9:u32 = select 0u, %7, %8
%10:u32 = shl 1u, %6
%11:bool = lt %6, 32u
%12:u32 = select 0u, %10, %11
%13:u32 = sub %9, 1u
%14:u32 = sub %12, 1u
%15:u32 = xor %13, %14
%16:u32 = construct %arg_2
%17:i32 = shl %arg_1, %16
%18:bool = lt %arg_2, 32u
%19:i32 = select 0i, %17, %18
%20:i32 = construct %15
%21:i32 = and %19, %20
%22:u32 = complement %15
%23:i32 = construct %22
%24:i32 = and %arg, %23
%result:i32 = or %21, %24
ret %result
}
}
)";

EXPECT_EQ(src, str());

BuiltinPolyfillConfig config;
config.insert_bits = BuiltinPolyfillLevel::kFull;
Run(BuiltinPolyfill, config);
EXPECT_EQ(expect, str());
}

TEST_F(IR_BuiltinPolyfillTest, InsertBits_Full_Vec2U32) {
Build(core::BuiltinFn::kInsertBits, ty.vec2<u32>(),
Vector{ty.vec2<u32>(), ty.vec2<u32>(), ty.u32(), ty.u32()});
auto* src = R"(
%foo = func(%arg:vec2<u32>, %arg_1:vec2<u32>, %arg_2:u32, %arg_3:u32):vec2<u32> { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%result:vec2<u32> = insertBits %arg, %arg_1, %arg_2, %arg_3
ret %result
}
}
)";
auto* expect = R"(
%foo = func(%arg:vec2<u32>, %arg_1:vec2<u32>, %arg_2:u32, %arg_3:u32):vec2<u32> { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%6:u32 = add %arg_2, %arg_3
%7:u32 = shl 1u, %arg_2
%8:bool = lt %arg_2, 32u
%9:u32 = select 0u, %7, %8
%10:u32 = shl 1u, %6
%11:bool = lt %6, 32u
%12:u32 = select 0u, %10, %11
%13:u32 = sub %9, 1u
%14:u32 = sub %12, 1u
%15:u32 = xor %13, %14
%16:vec2<u32> = construct %arg_2
%17:vec2<u32> = shl %arg_1, %16
%18:bool = lt %arg_2, 32u
%19:vec2<u32> = select vec2<u32>(0u), %17, %18
%20:vec2<u32> = construct %15
%21:vec2<u32> = and %19, %20
%22:u32 = complement %15
%23:vec2<u32> = construct %22
%24:vec2<u32> = and %arg, %23
%result:vec2<u32> = or %21, %24
ret %result
}
}
)";

EXPECT_EQ(src, str());

BuiltinPolyfillConfig config;
config.insert_bits = BuiltinPolyfillLevel::kFull;
Run(BuiltinPolyfill, config);
EXPECT_EQ(expect, str());
}

TEST_F(IR_BuiltinPolyfillTest, InsertBits_Full_Vec4I32) {
Build(core::BuiltinFn::kInsertBits, ty.vec4<i32>(),
Vector{ty.vec4<i32>(), ty.vec4<i32>(), ty.u32(), ty.u32()});
auto* src = R"(
%foo = func(%arg:vec4<i32>, %arg_1:vec4<i32>, %arg_2:u32, %arg_3:u32):vec4<i32> { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%result:vec4<i32> = insertBits %arg, %arg_1, %arg_2, %arg_3
ret %result
}
}
)";
auto* expect = R"(
%foo = func(%arg:vec4<i32>, %arg_1:vec4<i32>, %arg_2:u32, %arg_3:u32):vec4<i32> { # %arg_1: 'arg', %arg_2: 'arg', %arg_3: 'arg'
$B1: {
%6:u32 = add %arg_2, %arg_3
%7:u32 = shl 1u, %arg_2
%8:bool = lt %arg_2, 32u
%9:u32 = select 0u, %7, %8
%10:u32 = shl 1u, %6
%11:bool = lt %6, 32u
%12:u32 = select 0u, %10, %11
%13:u32 = sub %9, 1u
%14:u32 = sub %12, 1u
%15:u32 = xor %13, %14
%16:vec4<u32> = construct %arg_2
%17:vec4<i32> = shl %arg_1, %16
%18:bool = lt %arg_2, 32u
%19:vec4<i32> = select vec4<i32>(0i), %17, %18
%20:vec4<i32> = construct %15
%21:vec4<i32> = and %19, %20
%22:u32 = complement %15
%23:vec4<i32> = construct %22
%24:vec4<i32> = and %arg, %23
%result:vec4<i32> = or %21, %24
ret %result
}
}
)";

EXPECT_EQ(src, str());

BuiltinPolyfillConfig config;
config.insert_bits = BuiltinPolyfillLevel::kFull;
Run(BuiltinPolyfill, config);
EXPECT_EQ(expect, str());
}

TEST_F(IR_BuiltinPolyfillTest, TextureSampleBaseClampToEdge_2d_f32_NoPolyfill) {
auto* texture_ty =
ty.Get<core::type::SampledTexture>(core::type::TextureDimension::k2d, ty.f32());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,54 @@
SKIP: FAILED
struct VertexOutput {
float4 pos;
uint2 prevent_dce;
};

..\..\src\tint\lang\core\ir\transform\builtin_polyfill.cc:555 internal compiler error: TINT_UNIMPLEMENTED insertBits polyfill level
********************************************************************
* The tint shader compiler has encountered an unexpected error. *
* *
* Please help us fix this issue by submitting a bug report at *
* crbug.com/tint with the source program that triggered the bug. *
********************************************************************
struct vertex_main_outputs {
nointerpolation uint2 VertexOutput_prevent_dce : TEXCOORD0;
float4 VertexOutput_pos : SV_Position;
};


RWByteAddressBuffer prevent_dce : register(u0);
uint2 insertBits_3c7ba5() {
uint2 arg_0 = (1u).xx;
uint2 arg_1 = (1u).xx;
uint arg_2 = 1u;
uint arg_3 = 1u;
uint2 v = arg_0;
uint2 v_1 = arg_1;
uint v_2 = arg_2;
uint v_3 = (v_2 + arg_3);
uint v_4 = (((v_2 < 32u)) ? ((1u << v_2)) : (0u));
uint v_5 = ((v_4 - 1u) ^ ((((v_3 < 32u)) ? ((1u << v_3)) : (0u)) - 1u));
uint2 v_6 = (((v_2 < 32u)) ? ((v_1 << uint2((v_2).xx))) : ((0u).xx));
uint2 v_7 = (v_6 & uint2((v_5).xx));
uint2 res = (v_7 | (v & uint2((~(v_5)).xx)));
return res;
}

void fragment_main() {
prevent_dce.Store2(0u, insertBits_3c7ba5());
}

[numthreads(1, 1, 1)]
void compute_main() {
prevent_dce.Store2(0u, insertBits_3c7ba5());
}

VertexOutput vertex_main_inner() {
VertexOutput tint_symbol = (VertexOutput)0;
tint_symbol.pos = (0.0f).xxxx;
tint_symbol.prevent_dce = insertBits_3c7ba5();
VertexOutput v_8 = tint_symbol;
return v_8;
}

vertex_main_outputs vertex_main() {
VertexOutput v_9 = vertex_main_inner();
VertexOutput v_10 = v_9;
VertexOutput v_11 = v_9;
vertex_main_outputs v_12 = {v_11.prevent_dce, v_10.pos};
return v_12;
}

tint executable returned error: exit status 0xc000001d
Loading

0 comments on commit 8648fb1

Please sign in to comment.