pan/bi: Use FCLAMP pseudo op for clamp prop
Map nir_op_fsat/etc to FCLAMP pseudo ops, instead of FADD. There are significantly fewer knobs on FCLAMP, meaning significantly fewer things to get wrong. This fixes two(!) classes of bugs: * Swizzles (failing to lower/compose swizzles on clamps) * Numerical bugs (incorrectly treating +0.0 as an additive identity) Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12205>
This commit is contained in:
@@ -8313,4 +8313,24 @@
|
|||||||
</mod>
|
</mod>
|
||||||
</ins>
|
</ins>
|
||||||
|
|
||||||
|
<ins name="*FCLAMP.f32" pseudo="true">
|
||||||
|
<src start="0" mask="0xfb"/>
|
||||||
|
<mod name="clamp" start="15" size="2">
|
||||||
|
<opt>none</opt>
|
||||||
|
<opt>clamp_0_inf</opt>
|
||||||
|
<opt>clamp_m1_1</opt>
|
||||||
|
<opt>clamp_0_1</opt>
|
||||||
|
</mod>
|
||||||
|
</ins>
|
||||||
|
|
||||||
|
<ins name="*FCLAMP.v2f16" pseudo="true">
|
||||||
|
<src start="0" mask="0xfb"/>
|
||||||
|
<mod name="clamp" start="15" size="2">
|
||||||
|
<opt>none</opt>
|
||||||
|
<opt>clamp_0_inf</opt>
|
||||||
|
<opt>clamp_m1_1</opt>
|
||||||
|
<opt>clamp_0_1</opt>
|
||||||
|
</mod>
|
||||||
|
</ins>
|
||||||
|
|
||||||
</bifrost>
|
</bifrost>
|
||||||
|
@@ -33,6 +33,10 @@
|
|||||||
static void
|
static void
|
||||||
bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src)
|
bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src)
|
||||||
{
|
{
|
||||||
|
/* Identity is ok */
|
||||||
|
if (ins->src[src].swizzle == BI_SWIZZLE_H01)
|
||||||
|
return;
|
||||||
|
|
||||||
/* TODO: Use the opcode table and be a lot more methodical about this... */
|
/* TODO: Use the opcode table and be a lot more methodical about this... */
|
||||||
switch (ins->op) {
|
switch (ins->op) {
|
||||||
/* Some instructions used with 16-bit data never have swizzles */
|
/* Some instructions used with 16-bit data never have swizzles */
|
||||||
@@ -66,14 +70,24 @@ bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src)
|
|||||||
return;
|
return;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* We don't want to deal with reswizzling logic in modifier prop. Move
|
||||||
|
* the swizzle outside, it's easier for clamp propagation. */
|
||||||
|
case BI_OPCODE_FCLAMP_V2F16:
|
||||||
|
{
|
||||||
|
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||||
|
bi_index dest = ins->dest[0];
|
||||||
|
bi_index tmp = bi_temp(ctx);
|
||||||
|
|
||||||
|
ins->dest[0] = tmp;
|
||||||
|
bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Identity is ok (TODO: what about replicate only?) */
|
|
||||||
if (ins->src[src].swizzle == BI_SWIZZLE_H01)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* If the instruction is scalar we can ignore the other component */
|
/* If the instruction is scalar we can ignore the other component */
|
||||||
if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
|
if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
|
||||||
ins->src[src].swizzle == BI_SWIZZLE_H00)
|
ins->src[src].swizzle == BI_SWIZZLE_H00)
|
||||||
|
@@ -157,19 +157,16 @@ bi_takes_clamp(bi_instr *I)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
bi_is_fclamp(bi_instr *I)
|
bi_is_fclamp(enum bi_opcode op, enum bi_size size)
|
||||||
{
|
{
|
||||||
return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
|
return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
|
||||||
(!I->src[0].abs && !I->src[0].neg) &&
|
(size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
|
||||||
(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
|
|
||||||
(I->clamp != BI_CLAMP_NONE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
bi_optimizer_clamp(bi_instr *I, bi_instr *use)
|
bi_optimizer_clamp(bi_instr *I, bi_instr *use)
|
||||||
{
|
{
|
||||||
if (bi_opcode_props[use->op].size != bi_opcode_props[I->op].size) return false;
|
if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
|
||||||
if (!bi_is_fclamp(use)) return false;
|
|
||||||
if (!bi_takes_clamp(I)) return false;
|
if (!bi_takes_clamp(I)) return false;
|
||||||
|
|
||||||
/* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
|
/* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
|
||||||
@@ -260,6 +257,8 @@ bi_lower_opt_instruction(bi_instr *I)
|
|||||||
switch (I->op) {
|
switch (I->op) {
|
||||||
case BI_OPCODE_FABSNEG_F32:
|
case BI_OPCODE_FABSNEG_F32:
|
||||||
case BI_OPCODE_FABSNEG_V2F16:
|
case BI_OPCODE_FABSNEG_V2F16:
|
||||||
|
case BI_OPCODE_FCLAMP_F32:
|
||||||
|
case BI_OPCODE_FCLAMP_V2F16:
|
||||||
I->op = (bi_opcode_props[I->op].size == BI_SIZE_32) ?
|
I->op = (bi_opcode_props[I->op].size == BI_SIZE_32) ?
|
||||||
BI_OPCODE_FADD_F32 : BI_OPCODE_FADD_V2F16;
|
BI_OPCODE_FADD_F32 : BI_OPCODE_FADD_V2F16;
|
||||||
|
|
||||||
|
@@ -1862,19 +1862,19 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fsat: {
|
case nir_op_fsat: {
|
||||||
bi_instr *I = bi_fadd_to(b, sz, dst, s0, bi_negzero(), BI_ROUND_NONE);
|
bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
|
||||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_op_fsat_signed_mali: {
|
case nir_op_fsat_signed_mali: {
|
||||||
bi_instr *I = bi_fadd_to(b, sz, dst, s0, bi_negzero(), BI_ROUND_NONE);
|
bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
|
||||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_op_fclamp_pos_mali: {
|
case nir_op_fclamp_pos_mali: {
|
||||||
bi_instr *I = bi_fadd_to(b, sz, dst, s0, bi_negzero(), BI_ROUND_NONE);
|
bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
|
||||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user