Files
third_party_mesa3d/src/compiler/glsl/lower_precision.cpp
Timothy Arceri 87940c3193 glsl: dont lower precision for textureGatherOffsets
textureGatherOffsets always takes a highp array of constants. As
per the discussion in [1] trying to lower the precision results in segfault
later on in the compiler as textureGatherOffsets will end up being passed
a temp when its expecting a constant as required by the spec.

[1] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704

Fixes: b83f4b9fa2 ("glsl: Add an IR lowering pass to convert mediump operations to 16-bit")

Acked-by: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18101>
2022-08-18 23:45:04 +00:00

1377 lines
44 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 2019 Google, Inc
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file lower_precision.cpp
*/
#include "main/macros.h"
#include "main/consts_exts.h"
#include "compiler/glsl_types.h"
#include "ir.h"
#include "ir_builder.h"
#include "ir_optimization.h"
#include "ir_rvalue_visitor.h"
#include "util/half_float.h"
#include "util/set.h"
#include "util/hash_table.h"
#include <vector>
namespace {
class find_precision_visitor : public ir_rvalue_enter_visitor {
public:
find_precision_visitor(const struct gl_shader_compiler_options *options);
~find_precision_visitor();
virtual void handle_rvalue(ir_rvalue **rvalue);
virtual ir_visitor_status visit_enter(ir_call *ir);
ir_function_signature *map_builtin(ir_function_signature *sig);
/* Set of rvalues that can be lowered. This will be filled in by
* find_lowerable_rvalues_visitor. Only the root node of a lowerable section
* will be added to this set.
*/
struct set *lowerable_rvalues;
/**
* A mapping of builtin signature functions to lowered versions. This is
* filled in lazily when a lowered version is needed.
*/
struct hash_table *lowered_builtins;
/**
* A temporary hash table only used in order to clone functions.
*/
struct hash_table *clone_ht;
void *lowered_builtin_mem_ctx;
const struct gl_shader_compiler_options *options;
};
class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
public:
enum can_lower_state {
UNKNOWN,
CANT_LOWER,
SHOULD_LOWER,
};
enum parent_relation {
/* The parent performs a further operation involving the result from the
* child and can be lowered along with it.
*/
COMBINED_OPERATION,
/* The parent instructions operation is independent of the child type so
* the child should be lowered separately.
*/
INDEPENDENT_OPERATION,
};
struct stack_entry {
ir_instruction *instr;
enum can_lower_state state;
/* List of child rvalues that can be lowered. When this stack entry is
* popped, if this node itself cant be lowered than all of the children
* are root nodes to lower so we will add them to lowerable_rvalues.
* Otherwise if this node can also be lowered then we wont add the
* children because we only want to add the topmost lowerable nodes to
* lowerable_rvalues and the children will be lowered as part of lowering
* this node.
*/
std::vector<ir_instruction *> lowerable_children;
};
find_lowerable_rvalues_visitor(struct set *result,
const struct gl_shader_compiler_options *options);
static void stack_enter(class ir_instruction *ir, void *data);
static void stack_leave(class ir_instruction *ir, void *data);
virtual ir_visitor_status visit(ir_constant *ir);
virtual ir_visitor_status visit(ir_dereference_variable *ir);
virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
virtual ir_visitor_status visit_enter(ir_texture *ir);
virtual ir_visitor_status visit_enter(ir_expression *ir);
virtual ir_visitor_status visit_leave(ir_assignment *ir);
virtual ir_visitor_status visit_leave(ir_call *ir);
can_lower_state handle_precision(const glsl_type *type,
int precision) const;
static parent_relation get_parent_relation(ir_instruction *parent,
ir_instruction *child);
std::vector<stack_entry> stack;
struct set *lowerable_rvalues;
const struct gl_shader_compiler_options *options;
void pop_stack_entry();
void add_lowerable_children(const stack_entry &entry);
};
class lower_precision_visitor : public ir_rvalue_visitor {
public:
virtual void handle_rvalue(ir_rvalue **rvalue);
virtual ir_visitor_status visit_enter(ir_dereference_array *);
virtual ir_visitor_status visit_enter(ir_dereference_record *);
virtual ir_visitor_status visit_enter(ir_call *ir);
virtual ir_visitor_status visit_enter(ir_texture *ir);
virtual ir_visitor_status visit_leave(ir_expression *);
};
static bool
can_lower_type(const struct gl_shader_compiler_options *options,
const glsl_type *type)
{
/* Dont lower any expressions involving non-float types except bool and
* texture samplers. This will rule out operations that change the type such
* as conversion to ints. Instead it will end up lowering the arguments
* instead and adding a final conversion to float32. We want to handle
* boolean types so that it will do comparisons as 16-bit.
*/
switch (type->without_array()->base_type) {
/* TODO: should we do anything for these two with regard to Int16 vs FP16
* support?
*/
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
return true;
case GLSL_TYPE_FLOAT:
return options->LowerPrecisionFloat16;
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
return options->LowerPrecisionInt16;
default:
return false;
}
}
find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
const struct gl_shader_compiler_options *opts)
{
lowerable_rvalues = res;
options = opts;
callback_enter = stack_enter;
callback_leave = stack_leave;
data_enter = this;
data_leave = this;
}
void
find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
void *data)
{
find_lowerable_rvalues_visitor *state =
(find_lowerable_rvalues_visitor *) data;
/* Add a new stack entry for this instruction */
stack_entry entry;
entry.instr = ir;
entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
state->stack.push_back(entry);
}
void
find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
{
/* We cant lower this node so if there were any pending children then they
* are all root lowerable nodes and we should add them to the set.
*/
for (auto &it : entry.lowerable_children)
_mesa_set_add(lowerable_rvalues, it);
}
void
find_lowerable_rvalues_visitor::pop_stack_entry()
{
const stack_entry &entry = stack.back();
if (stack.size() >= 2) {
/* Combine this state into the parent state, unless the parent operation
* doesnt have any relation to the child operations
*/
stack_entry &parent = stack.end()[-2];
parent_relation rel = get_parent_relation(parent.instr, entry.instr);
if (rel == COMBINED_OPERATION) {
switch (entry.state) {
case CANT_LOWER:
parent.state = CANT_LOWER;
break;
case SHOULD_LOWER:
if (parent.state == UNKNOWN)
parent.state = SHOULD_LOWER;
break;
case UNKNOWN:
break;
}
}
}
if (entry.state == SHOULD_LOWER) {
ir_rvalue *rv = entry.instr->as_rvalue();
if (rv == NULL) {
add_lowerable_children(entry);
} else if (stack.size() >= 2) {
stack_entry &parent = stack.end()[-2];
switch (get_parent_relation(parent.instr, rv)) {
case COMBINED_OPERATION:
/* We only want to add the toplevel lowerable instructions to the
* lowerable set. Therefore if there is a parent then instead of
* adding this instruction to the set we will queue depending on
* the result of the parent instruction.
*/
parent.lowerable_children.push_back(entry.instr);
break;
case INDEPENDENT_OPERATION:
_mesa_set_add(lowerable_rvalues, rv);
break;
}
} else {
/* This is a toplevel node so add it directly to the lowerable
* set.
*/
_mesa_set_add(lowerable_rvalues, rv);
}
} else if (entry.state == CANT_LOWER) {
add_lowerable_children(entry);
}
stack.pop_back();
}
void
find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
void *data)
{
find_lowerable_rvalues_visitor *state =
(find_lowerable_rvalues_visitor *) data;
state->pop_stack_entry();
}
enum find_lowerable_rvalues_visitor::can_lower_state
find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
int precision) const
{
if (!can_lower_type(options, type))
return CANT_LOWER;
switch (precision) {
case GLSL_PRECISION_NONE:
return UNKNOWN;
case GLSL_PRECISION_HIGH:
return CANT_LOWER;
case GLSL_PRECISION_MEDIUM:
case GLSL_PRECISION_LOW:
return SHOULD_LOWER;
}
return CANT_LOWER;
}
enum find_lowerable_rvalues_visitor::parent_relation
find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
ir_instruction *child)
{
/* If the parent is a dereference instruction then the only child could be
* for example an array dereference and that should be lowered independently
* of the parent.
*/
if (parent->as_dereference())
return INDEPENDENT_OPERATION;
/* The precision of texture sampling depend on the precision of the sampler.
* The rest of the arguments dont matter so we can treat it as an
* independent operation.
*/
if (parent->as_texture())
return INDEPENDENT_OPERATION;
return COMBINED_OPERATION;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit(ir_constant *ir)
{
stack_enter(ir, this);
if (!can_lower_type(options, ir->type))
stack.back().state = CANT_LOWER;
stack_leave(ir, this);
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
{
stack_enter(ir, this);
if (stack.back().state == UNKNOWN)
stack.back().state = handle_precision(ir->type, ir->precision());
stack_leave(ir, this);
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (stack.back().state == UNKNOWN)
stack.back().state = handle_precision(ir->type, ir->precision());
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (stack.back().state == UNKNOWN)
stack.back().state = handle_precision(ir->type, ir->precision());
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
/* The precision of the sample value depends on the precision of the
* sampler.
*/
stack.back().state = handle_precision(ir->type,
ir->sampler->precision());
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
{
ir_hierarchical_visitor::visit_enter(ir);
if (!can_lower_type(options, ir->type))
stack.back().state = CANT_LOWER;
/* Don't lower precision for derivative calculations */
if (!options->LowerPrecisionDerivatives &&
(ir->operation == ir_unop_dFdx ||
ir->operation == ir_unop_dFdx_coarse ||
ir->operation == ir_unop_dFdx_fine ||
ir->operation == ir_unop_dFdy ||
ir->operation == ir_unop_dFdy_coarse ||
ir->operation == ir_unop_dFdy_fine)) {
stack.back().state = CANT_LOWER;
}
return visit_continue;
}
static bool
function_always_returns_mediump_or_lowp(const char *name)
{
return !strcmp(name, "bitCount") ||
!strcmp(name, "findLSB") ||
!strcmp(name, "findMSB") ||
!strcmp(name, "unpackHalf2x16") ||
!strcmp(name, "unpackUnorm4x8") ||
!strcmp(name, "unpackSnorm4x8");
}
static unsigned
handle_call(ir_call *ir, const struct set *lowerable_rvalues)
{
/* The intrinsic call is inside the wrapper imageLoad function that will
* be inlined. We have to handle both of them.
*/
if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
(ir->callee->is_builtin() &&
!strcmp(ir->callee_name(), "imageLoad"))) {
ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
ir_variable *resource = param->variable_referenced();
assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
assert(resource->type->without_array()->is_image());
/* GLSL ES 3.20 requires that images have a precision modifier, but if
* you set one, it doesn't do anything, because all intrinsics are
* defined with highp. This seems to be a spec bug.
*
* In theory we could set the return value to mediump if the image
* format has a lower precision. This appears to be the most sensible
* thing to do.
*/
const struct util_format_description *desc =
util_format_description(resource->data.image_format);
int i =
util_format_get_first_non_void_channel(resource->data.image_format);
bool mediump;
assert(i >= 0);
if (desc->channel[i].pure_integer ||
desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
mediump = desc->channel[i].size <= 16;
else
mediump = desc->channel[i].size <= 10; /* unorm/snorm */
return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
}
/* Return the declared precision for user-defined functions. */
if (!ir->callee->is_builtin())
return ir->callee->return_precision;
/* Handle special calls. */
if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
ir_variable *var = param->variable_referenced();
/* Handle builtin wrappers around ir_texture opcodes. These wrappers will
* be inlined by lower_precision() if we return true here, so that we can
* get to ir_texture later and do proper lowering.
*
* We should lower the type of the return value if the sampler type
* uses lower precision. The function parameters don't matter.
*/
if (var && var->type->without_array()->is_sampler()) {
/* textureSize always returns highp. */
if (!strcmp(ir->callee_name(), "textureSize"))
return GLSL_PRECISION_HIGH;
/* textureGatherOffsets always takes a highp array of constants. As
* per the discussion https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704
* trying to lower the precision results in segfault later on
* in the compiler as textureGatherOffsets will end up being passed
* a temp when its expecting a constant as required by the spec.
*/
if (!strcmp(ir->callee_name(), "textureGatherOffsets"))
return GLSL_PRECISION_HIGH;
return var->data.precision;
}
}
if (/* Parameters are always highp: */
!strcmp(ir->callee_name(), "floatBitsToInt") ||
!strcmp(ir->callee_name(), "floatBitsToUint") ||
!strcmp(ir->callee_name(), "intBitsToFloat") ||
!strcmp(ir->callee_name(), "uintBitsToFloat") ||
!strcmp(ir->callee_name(), "bitfieldReverse") ||
!strcmp(ir->callee_name(), "frexp") ||
!strcmp(ir->callee_name(), "ldexp") ||
/* Parameters and outputs are always highp: */
/* TODO: The operations are highp, but carry and borrow outputs are lowp. */
!strcmp(ir->callee_name(), "uaddCarry") ||
!strcmp(ir->callee_name(), "usubBorrow") ||
!strcmp(ir->callee_name(), "imulExtended") ||
!strcmp(ir->callee_name(), "umulExtended") ||
!strcmp(ir->callee_name(), "unpackUnorm2x16") ||
!strcmp(ir->callee_name(), "unpackSnorm2x16") ||
/* Outputs are highp: */
!strcmp(ir->callee_name(), "packUnorm2x16") ||
!strcmp(ir->callee_name(), "packSnorm2x16") ||
/* Parameters are mediump and outputs are highp. The parameters should
* be optimized in NIR, not here, e.g:
* - packHalf2x16 can just be a bitcast from f16vec2 to uint32
* - Other opcodes don't have to convert parameters to highp if the hw
* has f16 versions. Optimize in NIR accordingly.
*/
!strcmp(ir->callee_name(), "packHalf2x16") ||
!strcmp(ir->callee_name(), "packUnorm4x8") ||
!strcmp(ir->callee_name(), "packSnorm4x8") ||
/* Atomic functions are not lowered. */
strstr(ir->callee_name(), "atomic") == ir->callee_name())
return GLSL_PRECISION_HIGH;
assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
/* Number of parameters to check if they are lowerable. */
unsigned check_parameters = ir->actual_parameters.length();
/* Interpolation functions only consider the precision of the interpolant. */
/* Bitfield functions ignore the precision of "offset" and "bits". */
if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
!strcmp(ir->callee_name(), "interpolateAtSample") ||
!strcmp(ir->callee_name(), "bitfieldExtract")) {
check_parameters = 1;
} else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
check_parameters = 2;
} if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
/* These only lower the return value. Parameters keep their precision,
* which is preserved in map_builtin.
*/
check_parameters = 0;
}
/* If the call is to a builtin, then the function wont have a return
* precision and we should determine it from the precision of the arguments.
*/
foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
if (!check_parameters)
break;
if (!param->as_constant() &&
_mesa_set_search(lowerable_rvalues, param) == NULL)
return GLSL_PRECISION_HIGH;
--check_parameters;
}
return GLSL_PRECISION_MEDIUM;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
{
ir_hierarchical_visitor::visit_leave(ir);
/* Special case for handling temporary variables generated by the compiler
* for function calls. If we assign to one of these using a function call
* that has a lowerable return type then we can assume the temporary
* variable should have a medium precision too.
*/
/* Do nothing if the return type is void. */
if (!ir->return_deref)
return visit_continue;
ir_variable *var = ir->return_deref->variable_referenced();
assert(var->data.mode == ir_var_temporary);
unsigned return_precision = handle_call(ir, lowerable_rvalues);
can_lower_state lower_state =
handle_precision(var->type, return_precision);
if (lower_state == SHOULD_LOWER) {
/* There probably shouldnt be any situations where multiple ir_call
* instructions write to the same temporary?
*/
assert(var->data.precision == GLSL_PRECISION_NONE);
var->data.precision = GLSL_PRECISION_MEDIUM;
} else {
var->data.precision = GLSL_PRECISION_HIGH;
}
return visit_continue;
}
ir_visitor_status
find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
{
ir_hierarchical_visitor::visit_leave(ir);
/* Special case for handling temporary variables generated by the compiler.
* If we assign to one of these using a lowered precision then we can assume
* the temporary variable should have a medium precision too.
*/
ir_variable *var = ir->lhs->variable_referenced();
if (var->data.mode == ir_var_temporary) {
if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
/* Only override the precision if this is the first assignment. For
* temporaries such as the ones generated for the ?: operator there
* can be multiple assignments with different precisions. This way we
* get the highest precision of all of the assignments.
*/
if (var->data.precision == GLSL_PRECISION_NONE)
var->data.precision = GLSL_PRECISION_MEDIUM;
} else if (!ir->rhs->as_constant()) {
var->data.precision = GLSL_PRECISION_HIGH;
}
}
return visit_continue;
}
void
find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
exec_list *instructions,
struct set *result)
{
find_lowerable_rvalues_visitor v(result, options);
visit_list_elements(&v, instructions);
assert(v.stack.empty());
}
static const glsl_type *
convert_type(bool up, const glsl_type *type)
{
if (type->is_array()) {
return glsl_type::get_array_instance(convert_type(up, type->fields.array),
type->array_size(),
type->explicit_stride);
}
glsl_base_type new_base_type;
if (up) {
switch (type->base_type) {
case GLSL_TYPE_FLOAT16:
new_base_type = GLSL_TYPE_FLOAT;
break;
case GLSL_TYPE_INT16:
new_base_type = GLSL_TYPE_INT;
break;
case GLSL_TYPE_UINT16:
new_base_type = GLSL_TYPE_UINT;
break;
default:
unreachable("invalid type");
return NULL;
}
} else {
switch (type->base_type) {
case GLSL_TYPE_FLOAT:
new_base_type = GLSL_TYPE_FLOAT16;
break;
case GLSL_TYPE_INT:
new_base_type = GLSL_TYPE_INT16;
break;
case GLSL_TYPE_UINT:
new_base_type = GLSL_TYPE_UINT16;
break;
default:
unreachable("invalid type");
return NULL;
}
}
return glsl_type::get_instance(new_base_type,
type->vector_elements,
type->matrix_columns,
type->explicit_stride,
type->interface_row_major);
}
static const glsl_type *
lower_glsl_type(const glsl_type *type)
{
return convert_type(false, type);
}
static ir_rvalue *
convert_precision(bool up, ir_rvalue *ir)
{
unsigned op;
if (up) {
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT16:
op = ir_unop_f162f;
break;
case GLSL_TYPE_INT16:
op = ir_unop_i2i;
break;
case GLSL_TYPE_UINT16:
op = ir_unop_u2u;
break;
default:
unreachable("invalid type");
return NULL;
}
} else {
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
op = ir_unop_f2fmp;
break;
case GLSL_TYPE_INT:
op = ir_unop_i2imp;
break;
case GLSL_TYPE_UINT:
op = ir_unop_u2ump;
break;
default:
unreachable("invalid type");
return NULL;
}
}
const glsl_type *desired_type = convert_type(up, ir->type);
void *mem_ctx = ralloc_parent(ir);
return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
}
void
lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
{
ir_rvalue *ir = *rvalue;
if (ir == NULL)
return;
if (ir->as_dereference()) {
if (!ir->type->is_boolean())
*rvalue = convert_precision(false, ir);
} else if (ir->type->is_32bit()) {
ir->type = lower_glsl_type(ir->type);
ir_constant *const_ir = ir->as_constant();
if (const_ir) {
ir_constant_data value;
if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
} else if (ir->type->base_type == GLSL_TYPE_INT16) {
for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
value.i16[i] = const_ir->value.i[i];
} else if (ir->type->base_type == GLSL_TYPE_UINT16) {
for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
value.u16[i] = const_ir->value.u[i];
} else {
unreachable("invalid type");
}
const_ir->value = value;
}
}
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_dereference_record *ir)
{
/* We dont want to lower the variable */
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_dereference_array *ir)
{
/* We dont want to convert the array index or the variable. If the array
* index itself is lowerable that will be handled separately.
*/
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_call *ir)
{
/* We dont want to convert the arguments. These will be handled separately.
*/
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_enter(ir_texture *ir)
{
/* We dont want to convert the arguments. These will be handled separately.
*/
return visit_continue_with_parent;
}
ir_visitor_status
lower_precision_visitor::visit_leave(ir_expression *ir)
{
ir_rvalue_visitor::visit_leave(ir);
/* If the expression is a conversion operation to or from bool then fix the
* operation.
*/
switch (ir->operation) {
case ir_unop_b2f:
ir->operation = ir_unop_b2f16;
break;
case ir_unop_f2b:
ir->operation = ir_unop_f162b;
break;
case ir_unop_b2i:
case ir_unop_i2b:
/* Nothing to do - they both support int16. */
break;
default:
break;
}
return visit_continue;
}
void
find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
{
/* Checking the precision of rvalue can be lowered first throughout
* find_lowerable_rvalues_visitor.
* Once it found the precision of rvalue can be lowered, then we can
* add conversion f2fmp, etc. through lower_precision_visitor.
*/
if (*rvalue == NULL)
return;
struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
if (!entry)
return;
_mesa_set_remove(lowerable_rvalues, entry);
/* If the entire expression is just a variable dereference then trying to
* lower it will just directly add pointless to and from conversions without
* any actual operation in-between. Although these will eventually get
* optimised out, avoiding generating them here also avoids breaking inout
* parameters to functions.
*/
if ((*rvalue)->as_dereference())
return;
lower_precision_visitor v;
(*rvalue)->accept(&v);
v.handle_rvalue(rvalue);
/* We dont need to add the final conversion if the final type has been
* converted to bool
*/
if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
*rvalue = convert_precision(true, *rvalue);
}
}
ir_visitor_status
find_precision_visitor::visit_enter(ir_call *ir)
{
ir_rvalue_enter_visitor::visit_enter(ir);
ir_variable *return_var =
ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
/* Don't do anything for image_load here. We have only changed the return
* value to mediump/lowp, so that following instructions can use reduced
* precision.
*
* The return value type of the intrinsic itself isn't changed here, but
* can be changed in NIR if all users use the *2*mp opcode.
*/
if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
return visit_continue;
/* If this is a call to a builtin and the find_lowerable_rvalues_visitor
* overrode the precision of the temporary return variable, then we can
* replace the builtin implementation with a lowered version.
*/
if (!ir->callee->is_builtin() ||
ir->callee->is_intrinsic() ||
return_var == NULL ||
(return_var->data.precision != GLSL_PRECISION_MEDIUM &&
return_var->data.precision != GLSL_PRECISION_LOW))
return visit_continue;
ir->callee = map_builtin(ir->callee);
ir->generate_inline(ir);
ir->remove();
return visit_continue_with_parent;
}
ir_function_signature *
find_precision_visitor::map_builtin(ir_function_signature *sig)
{
if (lowered_builtins == NULL) {
lowered_builtins = _mesa_pointer_hash_table_create(NULL);
clone_ht =_mesa_pointer_hash_table_create(NULL);
lowered_builtin_mem_ctx = ralloc_context(NULL);
} else {
struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
if (entry)
return (ir_function_signature *) entry->data;
}
ir_function_signature *lowered_sig =
sig->clone(lowered_builtin_mem_ctx, clone_ht);
/* Functions that always return mediump or lowp should keep their
* parameters intact, because they can be highp. NIR can lower
* the up-conversion for parameters if needed.
*/
if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
param->data.precision = GLSL_PRECISION_MEDIUM;
}
}
lower_precision(options, &lowered_sig->body);
_mesa_hash_table_clear(clone_ht, NULL);
_mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
return lowered_sig;
}
find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
: lowerable_rvalues(_mesa_pointer_set_create(NULL)),
lowered_builtins(NULL),
clone_ht(NULL),
lowered_builtin_mem_ctx(NULL),
options(options)
{
}
find_precision_visitor::~find_precision_visitor()
{
_mesa_set_destroy(lowerable_rvalues, NULL);
if (lowered_builtins) {
_mesa_hash_table_destroy(lowered_builtins, NULL);
_mesa_hash_table_destroy(clone_ht, NULL);
ralloc_free(lowered_builtin_mem_ctx);
}
}
/* Lowering opcodes to 16 bits is not enough for programs with control flow
* (and the ?: operator, which is represented by if-then-else in the IR),
* because temporary variables, which are used for passing values between
* code blocks, are not lowered, resulting in 32-bit phis in NIR.
*
* First change the variable types to 16 bits, then change all ir_dereference
* types to 16 bits.
*/
class lower_variables_visitor : public ir_rvalue_enter_visitor {
public:
lower_variables_visitor(const struct gl_shader_compiler_options *options)
: options(options) {
lower_vars = _mesa_pointer_set_create(NULL);
}
virtual ~lower_variables_visitor()
{
_mesa_set_destroy(lower_vars, NULL);
}
virtual ir_visitor_status visit(ir_variable *var);
virtual ir_visitor_status visit_enter(ir_assignment *ir);
virtual ir_visitor_status visit_enter(ir_return *ir);
virtual ir_visitor_status visit_enter(ir_call *ir);
virtual void handle_rvalue(ir_rvalue **rvalue);
void fix_types_in_deref_chain(ir_dereference *ir);
void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
bool insert_before);
const struct gl_shader_compiler_options *options;
set *lower_vars;
};
static void
lower_constant(ir_constant *ir)
{
if (ir->type->is_array()) {
for (int i = 0; i < ir->type->array_size(); i++)
lower_constant(ir->get_array_element(i));
ir->type = lower_glsl_type(ir->type);
return;
}
ir->type = lower_glsl_type(ir->type);
ir_constant_data value;
if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
} else if (ir->type->base_type == GLSL_TYPE_INT16) {
for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
value.i16[i] = ir->value.i[i];
} else if (ir->type->base_type == GLSL_TYPE_UINT16) {
for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
value.u16[i] = ir->value.u[i];
} else {
unreachable("invalid type");
}
ir->value = value;
}
ir_visitor_status
lower_variables_visitor::visit(ir_variable *var)
{
if ((var->data.mode != ir_var_temporary &&
var->data.mode != ir_var_auto &&
/* Lower uniforms but not UBOs. */
(var->data.mode != ir_var_uniform ||
var->is_in_buffer_block() ||
!(options->LowerPrecisionFloat16Uniforms &&
var->type->without_array()->base_type == GLSL_TYPE_FLOAT))) ||
!var->type->without_array()->is_32bit() ||
(var->data.precision != GLSL_PRECISION_MEDIUM &&
var->data.precision != GLSL_PRECISION_LOW) ||
!can_lower_type(options, var->type))
return visit_continue;
/* Lower constant initializers. */
if (var->constant_value &&
var->type == var->constant_value->type) {
if (!options->LowerPrecisionConstants)
return visit_continue;
var->constant_value =
var->constant_value->clone(ralloc_parent(var), NULL);
lower_constant(var->constant_value);
}
if (var->constant_initializer &&
var->type == var->constant_initializer->type) {
if (!options->LowerPrecisionConstants)
return visit_continue;
var->constant_initializer =
var->constant_initializer->clone(ralloc_parent(var), NULL);
lower_constant(var->constant_initializer);
}
var->type = lower_glsl_type(var->type);
_mesa_set_add(lower_vars, var);
return visit_continue;
}
void
lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
{
assert(ir->type->without_array()->is_32bit());
assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
/* Fix the type in the dereference node. */
ir->type = lower_glsl_type(ir->type);
/* If it's an array, fix the types in the whole dereference chain. */
for (ir_dereference_array *deref_array = ir->as_dereference_array();
deref_array;
deref_array = deref_array->array->as_dereference_array()) {
assert(deref_array->array->type->without_array()->is_32bit());
deref_array->array->type = lower_glsl_type(deref_array->array->type);
}
}
void
lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
ir_rvalue *rhs,
bool insert_before)
{
void *mem_ctx = ralloc_parent(lhs);
if (lhs->type->is_array()) {
for (unsigned i = 0; i < lhs->type->length; i++) {
ir_dereference *l, *r;
l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
new(mem_ctx) ir_constant(i));
r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
new(mem_ctx) ir_constant(i));
convert_split_assignment(l, r, insert_before);
}
return;
}
assert(lhs->type->is_16bit() || lhs->type->is_32bit());
assert(rhs->type->is_16bit() || rhs->type->is_32bit());
assert(lhs->type->is_16bit() != rhs->type->is_16bit());
ir_assignment *assign =
new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs));
if (insert_before)
base_ir->insert_before(assign);
else
base_ir->insert_after(assign);
}
ir_visitor_status
lower_variables_visitor::visit_enter(ir_assignment *ir)
{
ir_dereference *lhs = ir->lhs;
ir_variable *var = lhs->variable_referenced();
ir_dereference *rhs_deref = ir->rhs->as_dereference();
ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
ir_constant *rhs_const = ir->rhs->as_constant();
/* Legalize array assignments between lowered and non-lowered variables. */
if (lhs->type->is_array() &&
(rhs_var || rhs_const) &&
(!rhs_var ||
(var &&
var->type->without_array()->is_16bit() !=
rhs_var->type->without_array()->is_16bit())) &&
(!rhs_const ||
(var &&
var->type->without_array()->is_16bit() &&
rhs_const->type->without_array()->is_32bit()))) {
assert(ir->rhs->type->is_array());
/* Fix array assignments from lowered to non-lowered. */
if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
fix_types_in_deref_chain(rhs_deref);
/* Convert to 32 bits for LHS. */
convert_split_assignment(lhs, rhs_deref, true);
ir->remove();
return visit_continue;
}
/* Fix array assignments from non-lowered to lowered. */
if (var &&
_mesa_set_search(lower_vars, var) &&
ir->rhs->type->without_array()->is_32bit()) {
fix_types_in_deref_chain(lhs);
/* Convert to 16 bits for LHS. */
convert_split_assignment(lhs, ir->rhs, true);
ir->remove();
return visit_continue;
}
}
/* Fix assignment types. */
if (var &&
_mesa_set_search(lower_vars, var)) {
/* Fix the LHS type. */
if (lhs->type->without_array()->is_32bit())
fix_types_in_deref_chain(lhs);
/* Fix the RHS type if it's a lowered variable. */
if (rhs_var &&
_mesa_set_search(lower_vars, rhs_var) &&
rhs_deref->type->without_array()->is_32bit())
fix_types_in_deref_chain(rhs_deref);
/* Fix the RHS type if it's a non-array expression. */
if (ir->rhs->type->is_32bit()) {
ir_expression *expr = ir->rhs->as_expression();
/* Convert the RHS to the LHS type. */
if (expr &&
(expr->operation == ir_unop_f162f ||
expr->operation == ir_unop_i2i ||
expr->operation == ir_unop_u2u) &&
expr->operands[0]->type->is_16bit()) {
/* If there is an "up" conversion, just remove it.
* This is optional. We could as well execute the else statement and
* let NIR eliminate the up+down conversions.
*/
ir->rhs = expr->operands[0];
} else {
/* Add a "down" conversion operation to fix the type of RHS. */
ir->rhs = convert_precision(false, ir->rhs);
}
}
}
return ir_rvalue_enter_visitor::visit_enter(ir);
}
ir_visitor_status
lower_variables_visitor::visit_enter(ir_return *ir)
{
void *mem_ctx = ralloc_parent(ir);
ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
if (deref) {
ir_variable *var = deref->variable_referenced();
/* Fix the type of the return value. */
if (var &&
_mesa_set_search(lower_vars, var) &&
deref->type->without_array()->is_32bit()) {
/* Create a 32-bit temporary variable. */
ir_variable *new_var =
new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
base_ir->insert_before(new_var);
/* Fix types in dereferences. */
fix_types_in_deref_chain(deref);
/* Convert to 32 bits for the return value. */
convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
deref, true);
ir->value = new(mem_ctx) ir_dereference_variable(new_var);
}
}
return ir_rvalue_enter_visitor::visit_enter(ir);
}
void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
{
ir_rvalue *ir = *rvalue;
if (in_assignee || ir == NULL)
return;
ir_expression *expr = ir->as_expression();
ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
/* Remove f2fmp(float16). Same for int16 and uint16. */
if (expr &&
expr_op0_deref &&
(expr->operation == ir_unop_f2fmp ||
expr->operation == ir_unop_i2imp ||
expr->operation == ir_unop_u2ump ||
expr->operation == ir_unop_f2f16 ||
expr->operation == ir_unop_i2i ||
expr->operation == ir_unop_u2u) &&
expr->type->without_array()->is_16bit() &&
expr_op0_deref->type->without_array()->is_32bit() &&
expr_op0_deref->variable_referenced() &&
_mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
fix_types_in_deref_chain(expr_op0_deref);
/* Remove f2fmp/i2imp/u2ump. */
*rvalue = expr_op0_deref;
return;
}
ir_dereference *deref = ir->as_dereference();
if (deref) {
ir_variable *var = deref->variable_referenced();
/* var can be NULL if we are dereferencing ir_constant. */
if (var &&
_mesa_set_search(lower_vars, var) &&
deref->type->without_array()->is_32bit()) {
void *mem_ctx = ralloc_parent(ir);
/* Create a 32-bit temporary variable. */
ir_variable *new_var =
new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
base_ir->insert_before(new_var);
/* Fix types in dereferences. */
fix_types_in_deref_chain(deref);
/* Convert to 32 bits for the rvalue. */
convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
deref, true);
*rvalue = new(mem_ctx) ir_dereference_variable(new_var);
}
}
}
ir_visitor_status
lower_variables_visitor::visit_enter(ir_call *ir)
{
void *mem_ctx = ralloc_parent(ir);
/* We can't pass 16-bit variables as 32-bit inout/out parameters. */
foreach_two_lists(formal_node, &ir->callee->parameters,
actual_node, &ir->actual_parameters) {
ir_dereference *param_deref =
((ir_rvalue *)actual_node)->as_dereference();
ir_variable *param = (ir_variable *)formal_node;
if (!param_deref)
continue;
ir_variable *var = param_deref->variable_referenced();
/* var can be NULL if we are dereferencing ir_constant. */
if (var &&
_mesa_set_search(lower_vars, var) &&
param->type->without_array()->is_32bit()) {
fix_types_in_deref_chain(param_deref);
/* Create a 32-bit temporary variable for the parameter. */
ir_variable *new_var =
new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
base_ir->insert_before(new_var);
/* Replace the parameter. */
actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
if (param->data.mode == ir_var_function_in ||
param->data.mode == ir_var_function_inout) {
/* Convert to 32 bits for passing in. */
convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
param_deref->clone(mem_ctx, NULL), true);
}
if (param->data.mode == ir_var_function_out ||
param->data.mode == ir_var_function_inout) {
/* Convert to 16 bits after returning. */
convert_split_assignment(param_deref,
new(mem_ctx) ir_dereference_variable(new_var),
false);
}
}
}
/* Fix the type of return value dereferencies. */
ir_dereference_variable *ret_deref = ir->return_deref;
ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
if (ret_var &&
_mesa_set_search(lower_vars, ret_var) &&
ret_deref->type->without_array()->is_32bit()) {
/* Create a 32-bit temporary variable. */
ir_variable *new_var =
new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
ir_var_temporary);
base_ir->insert_before(new_var);
/* Replace the return variable. */
ret_deref->var = new_var;
/* Convert to 16 bits after returning. */
convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
new(mem_ctx) ir_dereference_variable(new_var),
false);
}
return ir_rvalue_enter_visitor::visit_enter(ir);
}
}
void
lower_precision(const struct gl_shader_compiler_options *options,
exec_list *instructions)
{
find_precision_visitor v(options);
find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
visit_list_elements(&v, instructions);
lower_variables_visitor vars(options);
visit_list_elements(&vars, instructions);
}