glsl: Remove bitfield_extract and bitfield_insert lowering

As far as I can tell, every driver that supports GLSL 1.30 or
GL_EXT_gpu_shader4 (and therefore also enables support for
GL_MESA_shader_integer_functions) also sets some subset of the various
NIR lower_bitfield_extract and lower_bitfield_insert flags.

v2: Declaration of 'result' still needs to be added to the IR. Noticed
by marge.

v3: Fix 'git rebase --autosquash' putting the v2 fix in the wrong
place. I've never seen that happen before. :(

Reviewed-by: Emma Anholt <emma@anholt.net> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20323>
This commit is contained in:
Ian Romanick
2022-12-13 10:36:18 -08:00
parent db241fbd70
commit f5722c4973

View File

@@ -61,8 +61,6 @@
#define DOPS_TO_DFRAC 0x800
#define DFREXP_DLDEXP_TO_ARITH 0x1000
#define BIT_COUNT_TO_MATH 0x02000
#define EXTRACT_TO_SHIFTS 0x04000
#define INSERT_TO_SHIFTS 0x08000
#define REVERSE_TO_SHIFTS 0x10000
#define FIND_LSB_TO_FLOAT_CAST 0x20000
#define FIND_MSB_TO_FLOAT_CAST 0x40000
@@ -97,8 +95,6 @@ private:
void dtrunc_to_dfrac(ir_expression *);
void dsign_to_csel(ir_expression *);
void bit_count_to_math(ir_expression *);
void extract_to_shifts(ir_expression *);
void insert_to_shifts(ir_expression *);
void reverse_to_shifts(ir_expression *ir);
void find_lsb_to_float_cast(ir_expression *ir);
void find_msb_to_float_cast(ir_expression *ir);
@@ -135,8 +131,6 @@ lower_instructions(exec_list *instructions, bool have_ldexp, bool have_dfrexp,
* some caps for individual instructions.
*/
(!have_gpu_shader5 ? BIT_COUNT_TO_MATH |
EXTRACT_TO_SHIFTS |
INSERT_TO_SHIFTS |
REVERSE_TO_SHIFTS |
FIND_LSB_TO_FLOAT_CAST |
FIND_MSB_TO_FLOAT_CAST |
@@ -295,22 +289,12 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
new(ir) ir_constant(0x80000000u, vec_elem)),
sign_mantissa)));
/* Don't generate new IR that would need to be lowered in an additional
* pass.
*/
i.insert_before(result);
if (!lowering(INSERT_TO_SHIFTS)) {
i.insert_before(assign(result,
bitfield_insert(sign_mantissa,
i2u(resulting_biased_exp),
new(ir) ir_constant(23u, vec_elem),
new(ir) ir_constant(8u, vec_elem))));
} else {
i.insert_before(assign(result,
bit_or(sign_mantissa,
lshift(i2u(resulting_biased_exp),
new(ir) ir_constant(23, vec_elem)))));
}
i.insert_before(assign(result,
bitfield_insert(sign_mantissa,
i2u(resulting_biased_exp),
new(ir) ir_constant(23u, vec_elem),
new(ir) ir_constant(8u, vec_elem))));
ir->operation = ir_triop_csel;
ir->init_num_operands();
@@ -825,149 +809,6 @@ lower_instructions_visitor::bit_count_to_math(ir_expression *ir)
this->progress = true;
}
void
lower_instructions_visitor::extract_to_shifts(ir_expression *ir)
{
ir_variable *bits =
new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
base_ir->insert_before(bits);
base_ir->insert_before(assign(bits, ir->operands[2]));
if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
ir_constant *c1 =
new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
ir_constant *c32 =
new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
ir_constant *cFFFFFFFF =
new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
/* At least some hardware treats (x << y) as (x << (y%32)). This means
* we'd get a mask of 0 when bits is 32. Special case it.
*
* mask = bits == 32 ? 0xffffffff : (1u << bits) - 1u;
*/
ir_expression *mask = csel(equal(bits, c32),
cFFFFFFFF,
sub(lshift(c1, bits), c1->clone(ir, NULL)));
/* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
*
* If bits is zero, the result will be zero.
*
* Since (1 << 0) - 1 == 0, we don't need to bother with the conditional
* select as in the signed integer case.
*
* (value >> offset) & mask;
*/
ir->operation = ir_binop_bit_and;
ir->init_num_operands();
ir->operands[0] = rshift(ir->operands[0], ir->operands[1]);
ir->operands[1] = mask;
ir->operands[2] = NULL;
} else {
ir_constant *c0 =
new(ir) ir_constant(int(0), ir->operands[0]->type->vector_elements);
ir_constant *c32 =
new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
ir_variable *temp =
new(ir) ir_variable(ir->operands[0]->type, "temp", ir_var_temporary);
/* temp = 32 - bits; */
base_ir->insert_before(temp);
base_ir->insert_before(assign(temp, sub(c32, bits)));
/* expr = value << (temp - offset)) >> temp; */
ir_expression *expr =
rshift(lshift(ir->operands[0], sub(temp, ir->operands[1])), temp);
/* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
*
* If bits is zero, the result will be zero.
*
* Due to the (x << (y%32)) behavior mentioned before, the (value <<
* (32-0)) doesn't "erase" all of the data as we would like, so finish
* up with:
*
* (bits == 0) ? 0 : e;
*/
ir->operation = ir_triop_csel;
ir->init_num_operands();
ir->operands[0] = equal(c0, bits);
ir->operands[1] = c0->clone(ir, NULL);
ir->operands[2] = expr;
}
this->progress = true;
}
void
lower_instructions_visitor::insert_to_shifts(ir_expression *ir)
{
ir_constant *c1;
ir_constant *c32;
ir_constant *cFFFFFFFF;
ir_variable *offset =
new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary);
ir_variable *bits =
new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
ir_variable *mask =
new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary);
if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements);
c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements);
} else {
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
}
base_ir->insert_before(offset);
base_ir->insert_before(assign(offset, ir->operands[2]));
base_ir->insert_before(bits);
base_ir->insert_before(assign(bits, ir->operands[3]));
/* At least some hardware treats (x << y) as (x << (y%32)). This means
* we'd get a mask of 0 when bits is 32. Special case it.
*
* mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset;
*
* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
*
* The result will be undefined if offset or bits is negative, or if the
* sum of offset and bits is greater than the number of bits used to
* store the operand.
*
* Since it's undefined, there are a couple other ways this could be
* implemented. The other way that was considered was to put the csel
* around the whole thing:
*
* final_result = bits == 32 ? insert : ... ;
*/
base_ir->insert_before(mask);
base_ir->insert_before(assign(mask, csel(equal(bits, c32),
cFFFFFFFF,
lshift(sub(lshift(c1, bits),
c1->clone(ir, NULL)),
offset))));
/* (base & ~mask) | ((insert << offset) & mask) */
ir->operation = ir_binop_bit_or;
ir->init_num_operands();
ir->operands[0] = bit_and(ir->operands[0], bit_not(mask));
ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask);
ir->operands[2] = NULL;
ir->operands[3] = NULL;
this->progress = true;
}
void
lower_instructions_visitor::reverse_to_shifts(ir_expression *ir)
{
@@ -1457,16 +1298,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
bit_count_to_math(ir);
break;
case ir_triop_bitfield_extract:
if (lowering(EXTRACT_TO_SHIFTS))
extract_to_shifts(ir);
break;
case ir_quadop_bitfield_insert:
if (lowering(INSERT_TO_SHIFTS))
insert_to_shifts(ir);
break;
case ir_unop_bitfield_reverse:
if (lowering(REVERSE_TO_SHIFTS))
reverse_to_shifts(ir);