diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 4e2263055da..2bbc463297b 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -2260,7 +2260,8 @@ opt_shader_and_create_symbol_table(const struct gl_constants *consts, consts->GLSLHasHalfFloatPacking); do_mat_op_to_vec(shader->ir); - lower_instructions(shader->ir, exts->ARB_gpu_shader5); + lower_instructions(shader->ir, consts->ForceGLSLAbsSqrt, + exts->ARB_gpu_shader5); do_vec_index_to_cond_assign(shader->ir); diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 4d86926038f..778423f08d5 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -149,8 +149,6 @@ private: void adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type, nir_def *dest); - - const struct gl_constants *consts; }; /* @@ -204,7 +202,6 @@ glsl_to_nir(const struct gl_constants *consts, nir_visitor::nir_visitor(const struct gl_constants *consts, nir_shader *shader, const uint8_t *src_blake3) { - this->consts = consts; this->shader = shader; this->is_global = true; this->var_table = _mesa_pointer_hash_table_create(NULL); @@ -2076,19 +2073,8 @@ nir_visitor::visit(ir_expression *ir) : nir_isign(&b, srcs[0]); break; case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; - - case ir_unop_rsq: - if (consts->ForceGLSLAbsSqrt) - srcs[0] = nir_fabs(&b, srcs[0]); - result = nir_frsq(&b, srcs[0]); - break; - - case ir_unop_sqrt: - if (consts->ForceGLSLAbsSqrt) - srcs[0] = nir_fabs(&b, srcs[0]); - result = nir_fsqrt(&b, srcs[0]); - break; - + case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; + case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; case ir_unop_exp: result = nir_fexp2(&b, nir_fmul_imm(&b, srcs[0], M_LOG2E)); break; case ir_unop_log: result = nir_fmul_imm(&b, nir_flog2(&b, srcs[0]), 1.0 / M_LOG2E); break; case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index e4beeef95ac..d65c44a703b 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -53,7 +53,7 @@ bool do_minmax_prune(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); bool lower_instructions(exec_list *instructions, - bool have_gpu_shader5); + bool force_abs_sqrt, bool have_gpu_shader5); bool lower_packing_builtins(exec_list *instructions, bool has_shading_language_packing, bool has_gpu_shader5, diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 78f3e8f3bc3..d5f623c39a9 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -43,6 +43,7 @@ #define FIND_LSB_TO_FLOAT_CAST 0x20000 #define FIND_MSB_TO_FLOAT_CAST 0x40000 #define IMUL_HIGH_TO_MUL 0x80000 +#define SQRT_TO_ABS_SQRT 0x200000 using namespace ir_builder; @@ -65,6 +66,7 @@ private: void find_lsb_to_float_cast(ir_expression *ir); void find_msb_to_float_cast(ir_expression *ir); void imul_high_to_mul(ir_expression *ir); + void sqrt_to_abs_sqrt(ir_expression *ir); ir_expression *_carry(operand a, operand b); @@ -82,9 +84,11 @@ private: #define lowering(x) (this->lower & x) bool -lower_instructions(exec_list *instructions,bool have_gpu_shader5) +lower_instructions(exec_list *instructions, bool force_abs_sqrt, + bool have_gpu_shader5) { unsigned what_to_lower = + (force_abs_sqrt ? SQRT_TO_ABS_SQRT : 0) | /* Assume that if ARB_gpu_shader5 is not supported then all of the * extended integer functions need lowering. It may be necessary to add * some caps for individual instructions. @@ -489,6 +493,13 @@ lower_instructions_visitor::imul_high_to_mul(ir_expression *ir) } } +void +lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir) +{ + ir->operands[0] = new(ir) ir_expression(ir_unop_abs, ir->operands[0]); + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -517,6 +528,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) imul_high_to_mul(ir); break; + case ir_unop_rsq: + case ir_unop_sqrt: + if (lowering(SQRT_TO_ABS_SQRT)) + sqrt_to_abs_sqrt(ir); + break; + default: return visit_continue; } diff --git a/src/compiler/glsl/test_optpass.cpp b/src/compiler/glsl/test_optpass.cpp index a6287f33a68..676d690d9a0 100644 --- a/src/compiler/glsl/test_optpass.cpp +++ b/src/compiler/glsl/test_optpass.cpp @@ -87,7 +87,7 @@ do_optimization(struct exec_list *ir, const char *optimization, return do_vec_index_to_cond_assign(ir); } else if (sscanf(optimization, "lower_instructions ( %d ) ", &int_0) == 1) { - return lower_instructions(ir, false); + return lower_instructions(ir, false, false); } else { printf("Unrecognized optimization %s\n", optimization); exit(EXIT_FAILURE);