glsl: Add lowering pass for ir_unop_find_msb

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Ian Romanick
2016-06-24 01:53:33 -07:00
parent 2a381a3c73
commit 1b5477668a
2 changed files with 107 additions and 0 deletions

View File

@@ -47,6 +47,7 @@
#define INSERT_TO_SHIFTS 0x08000 #define INSERT_TO_SHIFTS 0x08000
#define REVERSE_TO_SHIFTS 0x10000 #define REVERSE_TO_SHIFTS 0x10000
#define FIND_LSB_TO_FLOAT_CAST 0x20000 #define FIND_LSB_TO_FLOAT_CAST 0x20000
#define FIND_MSB_TO_FLOAT_CAST 0x40000
/** /**
* \see class lower_packing_builtins_visitor * \see class lower_packing_builtins_visitor

View File

@@ -164,6 +164,7 @@ private:
void insert_to_shifts(ir_expression *); void insert_to_shifts(ir_expression *);
void reverse_to_shifts(ir_expression *ir); void reverse_to_shifts(ir_expression *ir);
void find_lsb_to_float_cast(ir_expression *ir); void find_lsb_to_float_cast(ir_expression *ir);
void find_msb_to_float_cast(ir_expression *ir);
}; };
} /* anonymous namespace */ } /* anonymous namespace */
@@ -1311,6 +1312,106 @@ lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir)
this->progress = true; this->progress = true;
} }
void
lower_instructions_visitor::find_msb_to_float_cast(ir_expression *ir)
{
/* For more details, see:
*
* http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
*/
const unsigned elements = ir->operands[0]->type->vector_elements;
ir_constant *c0 = new(ir) ir_constant(int(0), elements);
ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
ir_constant *c23 = new(ir) ir_constant(int(23), elements);
ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
ir_constant *c000000FF = new(ir) ir_constant(0x000000FFu, elements);
ir_constant *cFFFFFF00 = new(ir) ir_constant(0xFFFFFF00u, elements);
ir_variable *temp =
new(ir) ir_variable(glsl_type::uvec(elements), "temp", ir_var_temporary);
ir_variable *as_float =
new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
ir_variable *msb =
new(ir) ir_variable(glsl_type::ivec(elements), "msb", ir_var_temporary);
ir_instruction &i = *base_ir;
i.insert_before(temp);
if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
i.insert_before(assign(temp, ir->operands[0]));
} else {
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
/* findMSB(uint(abs(some_int))) almost always does the right thing.
* There are two problem values:
*
* * 0x80000000. Since abs(0x80000000) == 0x80000000, findMSB returns
* 31. However, findMSB(int(0x80000000)) == 30.
*
* * 0xffffffff. Since abs(0xffffffff) == 1, findMSB returns
* 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
*
* For a value of zero or negative one, -1 will be returned.
*
* For all negative number cases, including 0x80000000 and 0xffffffff,
* the correct value is obtained from findMSB if instead of negating the
* (already negative) value the logical-not is used. A conditonal
* logical-not can be achieved in two instructions.
*/
ir_variable *as_int =
new(ir) ir_variable(glsl_type::ivec(elements), "as_int", ir_var_temporary);
ir_constant *c31 = new(ir) ir_constant(int(31), elements);
i.insert_before(as_int);
i.insert_before(assign(as_int, ir->operands[0]));
i.insert_before(assign(temp, i2u(expr(ir_binop_bit_xor,
as_int,
rshift(as_int, c31)))));
}
/* The int-to-float conversion is lossless because bits are conditionally
* masked off the bottom of temp to ensure the value has at most 24 bits of
* data or is zero. We don't use the result in the zero case. The uint()
* cast is necessary so that 0x80000000 does not generate a negative value.
*
* float as_float = float(temp > 255 ? temp & ~255 : temp);
*/
i.insert_before(as_float);
i.insert_before(assign(as_float, u2f(csel(greater(temp, c000000FF),
bit_and(temp, cFFFFFF00),
temp))));
/* This is basically an open-coded frexp. Implementations that have a
* native frexp instruction would be better served by that. This is
* optimized versus a full-featured open-coded implementation in two ways:
*
* - We don't care about a correct result from subnormal numbers (including
* 0.0), so the raw exponent can always be safely unbiased.
*
* - The value cannot be negative, so it does not need to be masked off to
* extract the exponent.
*
* int msb = (floatBitsToInt(as_float) >> 23) - 0x7f;
*/
i.insert_before(msb);
i.insert_before(assign(msb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
/* Use msb in the comparison instead of temp so that the subtract can
* possibly generate the result without an explicit comparison.
*
* (msb < 0) ? -1 : msb;
*
* Since our input values are all integers, the unbiased exponent must not
* be negative. It will only be negative (-0x7f, in fact) if temp is 0.
*/
ir->operation = ir_triop_csel;
ir->operands[0] = less(msb, c0);
ir->operands[1] = cminus1;
ir->operands[2] = new(ir) ir_dereference_variable(msb);
this->progress = true;
}
ir_visitor_status ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir) lower_instructions_visitor::visit_leave(ir_expression *ir)
{ {
@@ -1438,6 +1539,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
find_lsb_to_float_cast(ir); find_lsb_to_float_cast(ir);
break; break;
case ir_unop_find_msb:
if (lowering(FIND_MSB_TO_FLOAT_CAST))
find_msb_to_float_cast(ir);
break;
default: default:
return visit_continue; return visit_continue;
} }