@@ -0,0 +1,618 @@
/*
* Copyright © 2019 Google, Inc
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file lower_precision.cpp
*/
# include "main/macros.h"
# include "compiler/glsl_types.h"
# include "ir.h"
# include "ir_builder.h"
# include "ir_optimization.h"
# include "ir_rvalue_visitor.h"
# include "util/half_float.h"
# include "util/set.h"
# include <vector>
namespace {
class find_precision_visitor : public ir_rvalue_enter_visitor {
public :
find_precision_visitor ( ) ;
~ find_precision_visitor ( ) ;
virtual void handle_rvalue ( ir_rvalue * * rvalue ) ;
bool progress ;
/* Set of rvalues that can be lowered. This will be filled in by
* find_lowerable_rvalues_visitor. Only the root node of a lowerable section
* will be added to this set.
*/
struct set * lowerable_rvalues ;
} ;
class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
public :
enum can_lower_state {
UNKNOWN ,
CANT_LOWER ,
SHOULD_LOWER ,
} ;
enum parent_relation {
/* The parent performs a further operation involving the result from the
* child and can be lowered along with it.
*/
COMBINED_OPERATION ,
/* The parent instruction’ s operation is independent of the child type so
* the child should be lowered separately.
*/
INDEPENDENT_OPERATION ,
} ;
struct stack_entry {
ir_instruction * instr ;
enum can_lower_state state ;
/* List of child rvalues that can be lowered. When this stack entry is
* popped, if this node itself can’ t be lowered than all of the children
* are root nodes to lower so we will add them to lowerable_rvalues.
* Otherwise if this node can also be lowered then we won’ t add the
* children because we only want to add the topmost lowerable nodes to
* lowerable_rvalues and the children will be lowered as part of lowering
* this node.
*/
std : : vector < ir_instruction * > lowerable_children ;
} ;
find_lowerable_rvalues_visitor ( struct set * result ) ;
static void stack_enter ( class ir_instruction * ir , void * data ) ;
static void stack_leave ( class ir_instruction * ir , void * data ) ;
virtual ir_visitor_status visit ( ir_constant * ir ) ;
virtual ir_visitor_status visit ( ir_dereference_variable * ir ) ;
virtual ir_visitor_status visit_enter ( ir_dereference_record * ir ) ;
virtual ir_visitor_status visit_enter ( ir_dereference_array * ir ) ;
virtual ir_visitor_status visit_enter ( ir_texture * ir ) ;
virtual ir_visitor_status visit_enter ( ir_expression * ir ) ;
virtual ir_visitor_status visit_leave ( ir_assignment * ir ) ;
virtual ir_visitor_status visit_leave ( ir_call * ir ) ;
static can_lower_state handle_precision ( const glsl_type * type ,
int precision ) ;
static parent_relation get_parent_relation ( ir_instruction * parent ,
ir_instruction * child ) ;
std : : vector < stack_entry > stack ;
struct set * lowerable_rvalues ;
void pop_stack_entry ( ) ;
void add_lowerable_children ( const stack_entry & entry ) ;
} ;
class lower_precision_visitor : public ir_rvalue_visitor {
public :
virtual void handle_rvalue ( ir_rvalue * * rvalue ) ;
virtual ir_visitor_status visit_enter ( ir_dereference_array * ) ;
virtual ir_visitor_status visit_enter ( ir_dereference_record * ) ;
virtual ir_visitor_status visit_enter ( ir_call * ir ) ;
virtual ir_visitor_status visit_enter ( ir_texture * ir ) ;
virtual ir_visitor_status visit_leave ( ir_expression * ) ;
} ;
bool
can_lower_type ( const glsl_type * type )
{
/* Don’ t lower any expressions involving non-float types except bool and
* texture samplers. This will rule out operations that change the type such
* as conversion to ints. Instead it will end up lowering the arguments
* instead and adding a final conversion to float32. We want to handle
* boolean types so that it will do comparisons as 16-bit.
*/
switch ( type - > base_type ) {
case GLSL_TYPE_FLOAT :
case GLSL_TYPE_BOOL :
case GLSL_TYPE_SAMPLER :
return true ;
default :
return false ;
}
}
find_lowerable_rvalues_visitor : : find_lowerable_rvalues_visitor ( struct set * res )
{
lowerable_rvalues = res ;
callback_enter = stack_enter ;
callback_leave = stack_leave ;
data_enter = this ;
data_leave = this ;
}
void
find_lowerable_rvalues_visitor : : stack_enter ( class ir_instruction * ir ,
void * data )
{
find_lowerable_rvalues_visitor * state =
( find_lowerable_rvalues_visitor * ) data ;
/* Add a new stack entry for this instruction */
stack_entry entry ;
entry . instr = ir ;
entry . state = state - > in_assignee ? CANT_LOWER : UNKNOWN ;
state - > stack . push_back ( entry ) ;
}
void
find_lowerable_rvalues_visitor : : add_lowerable_children ( const stack_entry & entry )
{
/* We can’ t lower this node so if there were any pending children then they
* are all root lowerable nodes and we should add them to the set.
*/
for ( auto & it : entry . lowerable_children )
_mesa_set_add ( lowerable_rvalues , it ) ;
}
void
find_lowerable_rvalues_visitor : : pop_stack_entry ( )
{
const stack_entry & entry = stack . end ( ) [ - 1 ] ;
if ( stack . size ( ) > = 2 ) {
/* Combine this state into the parent state, unless the parent operation
* doesn’ t have any relation to the child operations
*/
stack_entry & parent = stack . end ( ) [ - 2 ] ;
parent_relation rel = get_parent_relation ( parent . instr , entry . instr ) ;
if ( rel = = COMBINED_OPERATION ) {
switch ( entry . state ) {
case CANT_LOWER :
parent . state = CANT_LOWER ;
break ;
case SHOULD_LOWER :
if ( parent . state = = UNKNOWN )
parent . state = SHOULD_LOWER ;
break ;
case UNKNOWN :
break ;
}
}
}
if ( entry . state = = SHOULD_LOWER ) {
ir_rvalue * rv = entry . instr - > as_rvalue ( ) ;
if ( rv = = NULL ) {
add_lowerable_children ( entry ) ;
} else if ( stack . size ( ) > = 2 ) {
stack_entry & parent = stack . end ( ) [ - 2 ] ;
switch ( get_parent_relation ( parent . instr , rv ) ) {
case COMBINED_OPERATION :
/* We only want to add the toplevel lowerable instructions to the
* lowerable set. Therefore if there is a parent then instead of
* adding this instruction to the set we will queue depending on
* the result of the parent instruction.
*/
parent . lowerable_children . push_back ( entry . instr ) ;
break ;
case INDEPENDENT_OPERATION :
_mesa_set_add ( lowerable_rvalues , rv ) ;
break ;
}
} else {
/* This is a toplevel node so add it directly to the lowerable
* set.
*/
_mesa_set_add ( lowerable_rvalues , rv ) ;
}
} else if ( entry . state = = CANT_LOWER ) {
add_lowerable_children ( entry ) ;
}
stack . pop_back ( ) ;
}
void
find_lowerable_rvalues_visitor : : stack_leave ( class ir_instruction * ir ,
void * data )
{
find_lowerable_rvalues_visitor * state =
( find_lowerable_rvalues_visitor * ) data ;
state - > pop_stack_entry ( ) ;
}
enum find_lowerable_rvalues_visitor : : can_lower_state
find_lowerable_rvalues_visitor : : handle_precision ( const glsl_type * type ,
int precision )
{
if ( ! can_lower_type ( type ) )
return CANT_LOWER ;
switch ( precision ) {
case GLSL_PRECISION_NONE :
return UNKNOWN ;
case GLSL_PRECISION_HIGH :
return CANT_LOWER ;
case GLSL_PRECISION_MEDIUM :
case GLSL_PRECISION_LOW :
return SHOULD_LOWER ;
}
return CANT_LOWER ;
}
enum find_lowerable_rvalues_visitor : : parent_relation
find_lowerable_rvalues_visitor : : get_parent_relation ( ir_instruction * parent ,
ir_instruction * child )
{
/* If the parent is a dereference instruction then the only child could be
* for example an array dereference and that should be lowered independently
* of the parent.
*/
if ( parent - > as_dereference ( ) )
return INDEPENDENT_OPERATION ;
/* The precision of texture sampling depend on the precision of the sampler.
* The rest of the arguments don’ t matter so we can treat it as an
* independent operation.
*/
if ( parent - > as_texture ( ) )
return INDEPENDENT_OPERATION ;
return COMBINED_OPERATION ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit ( ir_constant * ir )
{
stack_enter ( ir , this ) ;
if ( ! can_lower_type ( ir - > type ) )
stack . end ( ) [ - 1 ] . state = CANT_LOWER ;
stack_leave ( ir , this ) ;
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit ( ir_dereference_variable * ir )
{
stack_enter ( ir , this ) ;
if ( stack . end ( ) [ - 1 ] . state = = UNKNOWN )
stack . end ( ) [ - 1 ] . state = handle_precision ( ir - > type , ir - > precision ( ) ) ;
stack_leave ( ir , this ) ;
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit_enter ( ir_dereference_record * ir )
{
ir_hierarchical_visitor : : visit_enter ( ir ) ;
if ( stack . end ( ) [ - 1 ] . state = = UNKNOWN )
stack . end ( ) [ - 1 ] . state = handle_precision ( ir - > type , ir - > precision ( ) ) ;
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit_enter ( ir_dereference_array * ir )
{
ir_hierarchical_visitor : : visit_enter ( ir ) ;
if ( stack . end ( ) [ - 1 ] . state = = UNKNOWN )
stack . end ( ) [ - 1 ] . state = handle_precision ( ir - > type , ir - > precision ( ) ) ;
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit_enter ( ir_texture * ir )
{
ir_hierarchical_visitor : : visit_enter ( ir ) ;
if ( stack . end ( ) [ - 1 ] . state = = UNKNOWN ) {
/* The precision of the sample value depends on the precision of the
* sampler.
*/
stack . end ( ) [ - 1 ] . state = handle_precision ( ir - > type ,
ir - > sampler - > precision ( ) ) ;
}
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit_enter ( ir_expression * ir )
{
ir_hierarchical_visitor : : visit_enter ( ir ) ;
if ( ! can_lower_type ( ir - > type ) )
stack . end ( ) [ - 1 ] . state = CANT_LOWER ;
/* Don't lower precision for derivative calculations */
if ( ir - > operation = = ir_unop_dFdx | |
ir - > operation = = ir_unop_dFdx_coarse | |
ir - > operation = = ir_unop_dFdx_fine | |
ir - > operation = = ir_unop_dFdy | |
ir - > operation = = ir_unop_dFdy_coarse | |
ir - > operation = = ir_unop_dFdy_fine ) {
stack . end ( ) [ - 1 ] . state = CANT_LOWER ;
}
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit_leave ( ir_call * ir )
{
ir_hierarchical_visitor : : visit_leave ( ir ) ;
/* Special case for handling temporary variables generated by the compiler
* for function calls. If we assign to one of these using a function call
* that has a lowerable return type then we can assume the temporary
* variable should have a medium precision too.
*/
/* Do nothing if the return type is void. */
if ( ! ir - > return_deref )
return visit_continue ;
ir_variable * var = ir - > return_deref - > variable_referenced ( ) ;
assert ( var - > data . mode = = ir_var_temporary ) ;
can_lower_state lower_state =
handle_precision ( var - > type , ir - > callee - > return_precision ) ;
if ( lower_state = = SHOULD_LOWER ) {
/* There probably shouldn’ t be any situations where multiple ir_call
* instructions write to the same temporary?
*/
assert ( var - > data . precision = = GLSL_PRECISION_NONE ) ;
var - > data . precision = GLSL_PRECISION_MEDIUM ;
} else {
var - > data . precision = GLSL_PRECISION_HIGH ;
}
return visit_continue ;
}
ir_visitor_status
find_lowerable_rvalues_visitor : : visit_leave ( ir_assignment * ir )
{
ir_hierarchical_visitor : : visit_leave ( ir ) ;
/* Special case for handling temporary variables generated by the compiler.
* If we assign to one of these using a lowered precision then we can assume
* the temporary variable should have a medium precision too.
*/
ir_variable * var = ir - > lhs - > variable_referenced ( ) ;
if ( var - > data . mode = = ir_var_temporary ) {
if ( _mesa_set_search ( lowerable_rvalues , ir - > rhs ) ) {
/* Only override the precision if this is the first assignment. For
* temporaries such as the ones generated for the ?: operator there
* can be multiple assignments with different precisions. This way we
* get the highest precision of all of the assignments.
*/
if ( var - > data . precision = = GLSL_PRECISION_NONE )
var - > data . precision = GLSL_PRECISION_MEDIUM ;
} else if ( ! ir - > rhs - > as_constant ( ) ) {
var - > data . precision = GLSL_PRECISION_HIGH ;
}
}
return visit_continue ;
}
void
find_lowerable_rvalues ( exec_list * instructions ,
struct set * result )
{
find_lowerable_rvalues_visitor v ( result ) ;
visit_list_elements ( & v , instructions ) ;
assert ( v . stack . empty ( ) ) ;
}
static ir_rvalue *
convert_precision ( int op , ir_rvalue * ir )
{
unsigned base_type = ( op = = ir_unop_f2fmp ?
GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT ) ;
const glsl_type * desired_type ;
desired_type = glsl_type : : get_instance ( base_type ,
ir - > type - > vector_elements ,
ir - > type - > matrix_columns ) ;
void * mem_ctx = ralloc_parent ( ir ) ;
return new ( mem_ctx ) ir_expression ( op , desired_type , ir , NULL ) ;
}
void
lower_precision_visitor : : handle_rvalue ( ir_rvalue * * rvalue )
{
ir_rvalue * ir = * rvalue ;
if ( ir = = NULL )
return ;
if ( ir - > as_dereference ( ) ) {
if ( ! ir - > type - > is_boolean ( ) )
* rvalue = convert_precision ( ir_unop_f2fmp , ir ) ;
} else if ( ir - > type - > is_float ( ) ) {
ir - > type = glsl_type : : get_instance ( GLSL_TYPE_FLOAT16 ,
ir - > type - > vector_elements ,
ir - > type - > matrix_columns ,
ir - > type - > explicit_stride ,
ir - > type - > interface_row_major ) ;
ir_constant * const_ir = ir - > as_constant ( ) ;
if ( const_ir ) {
ir_constant_data value ;
for ( unsigned i = 0 ; i < ARRAY_SIZE ( value . f16 ) ; i + + )
value . f16 [ i ] = _mesa_float_to_half ( const_ir - > value . f [ i ] ) ;
const_ir - > value = value ;
}
}
}
ir_visitor_status
lower_precision_visitor : : visit_enter ( ir_dereference_record * ir )
{
/* We don’ t want to lower the variable */
return visit_continue_with_parent ;
}
ir_visitor_status
lower_precision_visitor : : visit_enter ( ir_dereference_array * ir )
{
/* We don’ t want to convert the array index or the variable. If the array
* index itself is lowerable that will be handled separately.
*/
return visit_continue_with_parent ;
}
ir_visitor_status
lower_precision_visitor : : visit_enter ( ir_call * ir )
{
/* We don’ t want to convert the arguments. These will be handled separately.
*/
return visit_continue_with_parent ;
}
ir_visitor_status
lower_precision_visitor : : visit_enter ( ir_texture * ir )
{
/* We don’ t want to convert the arguments. These will be handled separately.
*/
return visit_continue_with_parent ;
}
ir_visitor_status
lower_precision_visitor : : visit_leave ( ir_expression * ir )
{
ir_rvalue_visitor : : visit_leave ( ir ) ;
/* If the expression is a conversion operation to or from bool then fix the
* operation.
*/
switch ( ir - > operation ) {
case ir_unop_b2f :
ir - > operation = ir_unop_b2f16 ;
break ;
case ir_unop_f2b :
ir - > operation = ir_unop_f162b ;
break ;
default :
break ;
}
return visit_continue ;
}
void
find_precision_visitor : : handle_rvalue ( ir_rvalue * * rvalue )
{
/* Checking the precision of rvalue can be lowered first throughout
* find_lowerable_rvalues_visitor.
* Once it found the precision of rvalue can be lowered, then we can
* add conversion f2fmp through lower_precision_visitor.
*/
if ( * rvalue = = NULL )
return ;
struct set_entry * entry = _mesa_set_search ( lowerable_rvalues , * rvalue ) ;
if ( ! entry )
return ;
_mesa_set_remove ( lowerable_rvalues , entry ) ;
/* If the entire expression is just a variable dereference then trying to
* lower it will just directly add pointless to and from conversions without
* any actual operation in-between. Although these will eventually get
* optimised out, avoiding generating them here also avoids breaking inout
* parameters to functions.
*/
if ( ( * rvalue ) - > as_dereference ( ) )
return ;
lower_precision_visitor v ;
( * rvalue ) - > accept ( & v ) ;
v . handle_rvalue ( rvalue ) ;
/* We don’ t need to add the final conversion if the final type has been
* converted to bool
*/
if ( ( * rvalue ) - > type - > base_type ! = GLSL_TYPE_BOOL )
* rvalue = convert_precision ( ir_unop_f162f , * rvalue ) ;
progress = true ;
}
find_precision_visitor : : find_precision_visitor ( )
: progress ( false ) ,
lowerable_rvalues ( _mesa_pointer_set_create ( NULL ) )
{
}
find_precision_visitor : : ~ find_precision_visitor ( )
{
_mesa_set_destroy ( lowerable_rvalues , NULL ) ;
}
}
bool
lower_precision ( exec_list * instructions )
{
find_precision_visitor v ;
find_lowerable_rvalues ( instructions , v . lowerable_rvalues ) ;
visit_list_elements ( & v , instructions ) ;
return v . progress ;
}