r300: add switch to support IEEE and FF math opcodes
Also add support for the 0*NaN = NaN IEEE compliant multiply on R500. All of this is disabled by default, but can be enabled with a RADEON_DEBUG variable or alternativelly with a driconf tweak. Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31745>
This commit is contained in:

committed by
Marge Bot

parent
26fc1ea9e5
commit
584ac64670
@@ -1869,6 +1869,13 @@ r300 driver environment variables
|
||||
Disable AA compression and fast AA clear
|
||||
``notcl``
|
||||
Disable hardware accelerated Transform/Clip/Lighting
|
||||
``ieeemath``
|
||||
Force IEEE versions of VS math opcodes where applicable
|
||||
and also IEEE handling of multiply by zero (R5xx only)
|
||||
``ffmath``
|
||||
Force FF versions of VS math opcodes where applicable
|
||||
and 0 * anything = 0 rules in FS
|
||||
|
||||
|
||||
Asahi driver environment variables
|
||||
----------------------------------
|
||||
|
@@ -197,6 +197,26 @@ ei_math1(struct r300_vertex_program_code *vp, unsigned int hw_opcode,
|
||||
inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
|
||||
}
|
||||
|
||||
static void
|
||||
ei_math1_select(struct r300_vertex_program_code *vp,
|
||||
unsigned math_mode,
|
||||
unsigned hw_opcode_ieee,
|
||||
unsigned hw_opcode_dx,
|
||||
unsigned hw_opcode_ff,
|
||||
struct rc_sub_instruction *vpi,
|
||||
unsigned int *inst)
|
||||
{
|
||||
unsigned hw_opcode;
|
||||
switch (math_mode) {
|
||||
case RC_MATH_IEEE: hw_opcode = hw_opcode_ieee; break;
|
||||
case RC_MATH_DX: hw_opcode = hw_opcode_dx; break;
|
||||
case RC_MATH_FF: hw_opcode = hw_opcode_ff; break;
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
ei_math1(vp, hw_opcode, vpi, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
ei_cmp(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int *inst)
|
||||
{
|
||||
@@ -407,7 +427,8 @@ translate_vertex_program(struct radeon_compiler *c, void *user)
|
||||
ei_vector1(compiler->code, VE_FRACTION, vpi, inst);
|
||||
break;
|
||||
case RC_OPCODE_LG2:
|
||||
ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst);
|
||||
ei_math1_select(compiler->code, compiler->Base.math_rules, ME_LOG_BASE2_IEEE,
|
||||
ME_LOG_BASE2_FULL_DX, ME_LOG_BASE2_FULL_DX, vpi, inst);
|
||||
break;
|
||||
case RC_OPCODE_LIT:
|
||||
ei_lit(compiler->code, vpi, inst);
|
||||
@@ -434,10 +455,12 @@ translate_vertex_program(struct radeon_compiler *c, void *user)
|
||||
ei_pow(compiler->code, vpi, inst);
|
||||
break;
|
||||
case RC_OPCODE_RCP:
|
||||
ei_math1(compiler->code, ME_RECIP_DX, vpi, inst);
|
||||
ei_math1_select(compiler->code, compiler->Base.math_rules, ME_RECIP_IEEE,
|
||||
ME_RECIP_DX, ME_RECIP_FF, vpi, inst);
|
||||
break;
|
||||
case RC_OPCODE_RSQ:
|
||||
ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst);
|
||||
ei_math1_select(compiler->code, compiler->Base.math_rules, ME_RECIP_SQRT_IEEE,
|
||||
ME_RECIP_SQRT_DX, ME_RECIP_SQRT_FF, vpi, inst);
|
||||
break;
|
||||
case RC_OPCODE_SEQ:
|
||||
ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst);
|
||||
|
@@ -14,6 +14,10 @@
|
||||
|
||||
#define RC_DBG_LOG (1 << 0)
|
||||
|
||||
#define RC_MATH_DX 0x00
|
||||
#define RC_MATH_IEEE 0x01
|
||||
#define RC_MATH_FF 0x02
|
||||
|
||||
struct rc_swizzle_caps;
|
||||
|
||||
enum rc_program_type { RC_VERTEX_PROGRAM, RC_FRAGMENT_PROGRAM, RC_NUM_PROGRAM_TYPES };
|
||||
@@ -45,6 +49,9 @@ struct radeon_compiler {
|
||||
/* Whether to remove unused constants and empty holes in constant space. */
|
||||
unsigned remove_unused_constants : 1;
|
||||
|
||||
/* Math compatibility mode, for some PVS opcodes and for multiply by zero rules on R5xx */
|
||||
unsigned math_rules : 2;
|
||||
|
||||
/**
|
||||
* Variables used internally, not be touched by callers
|
||||
* of the compiler
|
||||
|
@@ -34,6 +34,8 @@ static const struct debug_named_value r300_debug_options[] = {
|
||||
{ "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
|
||||
{ "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" },
|
||||
{ "notcl", DBG_NO_TCL, "Disable hardware accelerated Transform/Clip/Lighting" },
|
||||
{ "ieeemath", DBG_IEEEMATH, "Force IEEE versions of VS math opcodes where applicable and also IEEE handling of multiply by zero (R5xx only)" },
|
||||
{ "ffmath", DBG_FFMATH, "Force FF versions of VS math opcodes where applicable and 0*anything=0 rules in FS" },
|
||||
|
||||
/* must be last */
|
||||
DEBUG_NAMED_VALUE_END
|
||||
|
@@ -1,4 +1,6 @@
|
||||
OPT_BOOL(nohiz, false, "Disable hierarchical zbuffer")
|
||||
OPT_BOOL(nozmask, false, "Disable zbuffer compression")
|
||||
OPT_BOOL(ieeemath, false, "Force IEEE math rules and opcodes where applicable")
|
||||
OPT_BOOL(ffmath, false, "Force FF math rules and opcodes where applicable")
|
||||
|
||||
#undef OPT_BOOL
|
||||
|
@@ -255,7 +255,10 @@ static void r300_emit_fs_code_to_buffer(
|
||||
code->int_constant_count * 2;
|
||||
|
||||
NEW_CB(shader->cb_code, shader->cb_code_size);
|
||||
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
|
||||
if (r300->screen->options.ieeemath)
|
||||
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_DEFAULT);
|
||||
else
|
||||
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_LEGACY);
|
||||
OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
|
||||
OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
|
||||
for(i = 0; i < code->int_constant_count; i++){
|
||||
|
@@ -3279,7 +3279,8 @@ enum {
|
||||
# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
|
||||
# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
|
||||
#define R500_US_CONFIG 0x4600
|
||||
# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
|
||||
# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_DEFAULT (0 << 1)
|
||||
# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_LEGACY (1 << 1)
|
||||
#define R500_US_FC_ADDR_0 0xa000
|
||||
# define R500_FC_BOOL_ADDR(x) ((x) << 0)
|
||||
# define R500_FC_INT_ADDR(x) ((x) << 8)
|
||||
|
@@ -840,6 +840,11 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws,
|
||||
if (SCREEN_DBG_ON(r300screen, DBG_NO_TCL))
|
||||
r300screen->caps.has_tcl = false;
|
||||
|
||||
if (SCREEN_DBG_ON(r300screen, DBG_IEEEMATH))
|
||||
r300screen->options.ieeemath = true;
|
||||
if (SCREEN_DBG_ON(r300screen, DBG_FFMATH))
|
||||
r300screen->options.ffmath = true;
|
||||
|
||||
r300screen->rws = rws;
|
||||
r300screen->screen.destroy = r300_destroy_screen;
|
||||
r300screen->screen.get_name = r300_get_name;
|
||||
|
@@ -92,6 +92,8 @@ radeon_winsys(struct pipe_screen *screen) {
|
||||
#define DBG_NO_HIZ (1 << 22)
|
||||
#define DBG_NO_CMASK (1 << 23)
|
||||
#define DBG_NO_TCL (1 << 25)
|
||||
#define DBG_IEEEMATH (1 << 26)
|
||||
#define DBG_FFMATH (1 << 27)
|
||||
/*@}*/
|
||||
static inline bool SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
|
||||
{
|
||||
|
@@ -198,6 +198,12 @@ void r300_translate_vertex_shader(struct r300_context *r300,
|
||||
compiler.Base.debug = &r300->context.debug;
|
||||
compiler.Base.is_r500 = r300->screen->caps.is_r500;
|
||||
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
|
||||
/* Only R500 has few IEEE math opcodes. */
|
||||
if (r300->screen->options.ieeemath && r300->screen->caps.is_r500) {
|
||||
compiler.Base.math_rules = RC_MATH_IEEE;
|
||||
} else if (r300->screen->options.ffmath) {
|
||||
compiler.Base.math_rules = RC_MATH_FF;
|
||||
}
|
||||
compiler.Base.has_half_swizzles = false;
|
||||
compiler.Base.has_presub = false;
|
||||
compiler.Base.has_omod = false;
|
||||
|
Reference in New Issue
Block a user