nir/lower_idiv: add options to use fp32 for 8-bit division lowering
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10081>
This commit is contained in:
@@ -3319,7 +3319,11 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
|
|||||||
/* TODO: Implement nir_op_uadd_sat with LLVM. */
|
/* TODO: Implement nir_op_uadd_sat with LLVM. */
|
||||||
if (!radv_use_llvm_for_stage(device, i))
|
if (!radv_use_llvm_for_stage(device, i))
|
||||||
nir_opt_idiv_const(nir[i], 8);
|
nir_opt_idiv_const(nir[i], 8);
|
||||||
nir_lower_idiv(nir[i], nir_lower_idiv_precise);
|
|
||||||
|
nir_lower_idiv(nir[i], &(nir_lower_idiv_options){
|
||||||
|
.imprecise_32bit_lowering = false,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
});
|
||||||
|
|
||||||
nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
|
nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
|
||||||
nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
|
nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
|
||||||
|
@@ -1397,7 +1397,11 @@ v3d_attempt_compile(struct v3d_compile *c)
|
|||||||
NIR_PASS_V(c->s, v3d_nir_lower_io, c);
|
NIR_PASS_V(c->s, v3d_nir_lower_io, c);
|
||||||
NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
|
NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
|
||||||
NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
|
NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
|
||||||
NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = true,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(c->s, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
if (c->key->robust_buffer_access) {
|
if (c->key->robust_buffer_access) {
|
||||||
/* v3d_nir_lower_robust_buffer_access assumes constant buffer
|
/* v3d_nir_lower_robust_buffer_access assumes constant buffer
|
||||||
|
@@ -4855,19 +4855,26 @@ enum nir_lower_non_uniform_access_type {
|
|||||||
bool nir_lower_non_uniform_access(nir_shader *shader,
|
bool nir_lower_non_uniform_access(nir_shader *shader,
|
||||||
enum nir_lower_non_uniform_access_type);
|
enum nir_lower_non_uniform_access_type);
|
||||||
|
|
||||||
enum nir_lower_idiv_path {
|
typedef struct {
|
||||||
/* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of
|
/* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV()
|
||||||
* the two but it is not exact in some cases (for example, 1091317713u /
|
* is used. It is the faster of the two but it is not exact in some cases
|
||||||
* 1034u gives 5209173 instead of 1055432) */
|
* (for example, 1091317713u / 1034u gives 5209173 instead of 1055432).
|
||||||
nir_lower_idiv_fast,
|
*
|
||||||
/* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and
|
* If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and
|
||||||
* AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than
|
* AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more
|
||||||
* the nv50 path and many of them are integer multiplications, so it is
|
* instructions than the nv50 path and many of them are integer
|
||||||
* probably slower. It should always return the correct result, though. */
|
* multiplications, so it is probably slower. It should always return the
|
||||||
nir_lower_idiv_precise,
|
* correct result, though.
|
||||||
};
|
*/
|
||||||
|
bool imprecise_32bit_lowering;
|
||||||
|
|
||||||
bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path);
|
/* Whether 16-bit floating point arithmetic should be allowed in 8-bit
|
||||||
|
* division lowering
|
||||||
|
*/
|
||||||
|
bool allow_fp16;
|
||||||
|
} nir_lower_idiv_options;
|
||||||
|
|
||||||
|
bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
|
||||||
|
|
||||||
typedef struct nir_input_attachment_options {
|
typedef struct nir_input_attachment_options {
|
||||||
bool use_fragcoord_sysval;
|
bool use_fragcoord_sysval;
|
||||||
|
@@ -200,11 +200,12 @@ convert_instr_precise(nir_builder *bld, nir_op op,
|
|||||||
|
|
||||||
static nir_ssa_def *
|
static nir_ssa_def *
|
||||||
convert_instr_small(nir_builder *b, nir_op op,
|
convert_instr_small(nir_builder *b, nir_op op,
|
||||||
nir_ssa_def *numer, nir_ssa_def *denom)
|
nir_ssa_def *numer, nir_ssa_def *denom,
|
||||||
|
const nir_lower_idiv_options *options)
|
||||||
{
|
{
|
||||||
unsigned sz = numer->bit_size;
|
unsigned sz = numer->bit_size;
|
||||||
nir_alu_type int_type = nir_op_infos[op].output_type | sz;
|
nir_alu_type int_type = nir_op_infos[op].output_type | sz;
|
||||||
nir_alu_type float_type = nir_type_float | (sz * 2);
|
nir_alu_type float_type = nir_type_float | (options->allow_fp16 ? sz * 2 : 32);
|
||||||
|
|
||||||
nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type);
|
nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type);
|
||||||
nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type);
|
nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type);
|
||||||
@@ -240,18 +241,18 @@ convert_instr_small(nir_builder *b, nir_op op,
|
|||||||
static nir_ssa_def *
|
static nir_ssa_def *
|
||||||
lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
|
lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
|
||||||
{
|
{
|
||||||
enum nir_lower_idiv_path *path = _data;
|
const nir_lower_idiv_options *options = _data;
|
||||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||||
|
|
||||||
nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0);
|
nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0);
|
||||||
nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1);
|
nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1);
|
||||||
|
|
||||||
if (numer->bit_size < 32)
|
if (numer->bit_size < 32)
|
||||||
return convert_instr_small(b, alu->op, numer, denom);
|
return convert_instr_small(b, alu->op, numer, denom, options);
|
||||||
else if (*path == nir_lower_idiv_precise)
|
else if (options->imprecise_32bit_lowering)
|
||||||
return convert_instr_precise(b, alu->op, numer, denom);
|
|
||||||
else
|
|
||||||
return convert_instr(b, alu->op, numer, denom);
|
return convert_instr(b, alu->op, numer, denom);
|
||||||
|
else
|
||||||
|
return convert_instr_precise(b, alu->op, numer, denom);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -278,10 +279,10 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void *_state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path)
|
nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options)
|
||||||
{
|
{
|
||||||
return nir_shader_lower_instructions(shader,
|
return nir_shader_lower_instructions(shader,
|
||||||
inst_is_idiv,
|
inst_is_idiv,
|
||||||
lower_idiv,
|
lower_idiv,
|
||||||
&path);
|
(void *)options);
|
||||||
}
|
}
|
||||||
|
@@ -320,7 +320,11 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
|
|||||||
/* do idiv lowering after first opt loop to get a chance to propagate
|
/* do idiv lowering after first opt loop to get a chance to propagate
|
||||||
* constants for divide by immed power-of-two:
|
* constants for divide by immed power-of-two:
|
||||||
*/
|
*/
|
||||||
const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = true,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
const bool idiv_progress = OPT(s, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
if (idiv_progress)
|
if (idiv_progress)
|
||||||
ir3_optimize_loop(s);
|
ir3_optimize_loop(s);
|
||||||
|
@@ -1113,7 +1113,11 @@ etna_compile_shader_nir(struct etna_shader_variant *v)
|
|||||||
NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX);
|
NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX);
|
||||||
NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u });
|
NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u });
|
||||||
NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
|
NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
|
||||||
NIR_PASS_V(s, nir_lower_idiv, nir_lower_idiv_fast);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = true,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(s, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
etna_optimize_loop(s);
|
etna_optimize_loop(s);
|
||||||
|
|
||||||
|
@@ -3139,7 +3139,11 @@ Converter::run()
|
|||||||
/*TODO: improve this lowering/optimisation loop so that we can use
|
/*TODO: improve this lowering/optimisation loop so that we can use
|
||||||
* nir_opt_idiv_const effectively before this.
|
* nir_opt_idiv_const effectively before this.
|
||||||
*/
|
*/
|
||||||
NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = false,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
progress = false;
|
progress = false;
|
||||||
|
@@ -863,9 +863,11 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
|||||||
|
|
||||||
NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
|
NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
|
||||||
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
|
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
|
||||||
NIR_PASS_V(sel->nir, nir_lower_idiv,
|
nir_lower_idiv_options idiv_options = {
|
||||||
sel->nir->info.stage == MESA_SHADER_COMPUTE ?
|
.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE,
|
||||||
nir_lower_idiv_precise : nir_lower_idiv_fast);
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
|
||||||
NIR_PASS_V(sel->nir, r600_lower_alu);
|
NIR_PASS_V(sel->nir, r600_lower_alu);
|
||||||
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
|
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
|
||||||
|
|
||||||
|
@@ -2316,7 +2316,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
|||||||
|
|
||||||
NIR_PASS_V(c->s, vc4_nir_lower_io, c);
|
NIR_PASS_V(c->s, vc4_nir_lower_io, c);
|
||||||
NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
|
NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
|
||||||
NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = true,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(c->s, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
vc4_optimize_nir(c->s);
|
vc4_optimize_nir(c->s);
|
||||||
|
|
||||||
|
@@ -2834,7 +2834,11 @@ bi_optimize_nir(nir_shader *nir)
|
|||||||
|
|
||||||
NIR_PASS(progress, nir, nir_lower_int64);
|
NIR_PASS(progress, nir, nir_lower_int64);
|
||||||
|
|
||||||
NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = true,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
|
NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
|
||||||
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
|
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
|
||||||
|
@@ -296,7 +296,11 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
|
|||||||
(nir->options->lower_flrp64 ? 64 : 0);
|
(nir->options->lower_flrp64 ? 64 : 0);
|
||||||
|
|
||||||
NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
|
NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
|
||||||
NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
|
nir_lower_idiv_options idiv_options = {
|
||||||
|
.imprecise_32bit_lowering = true,
|
||||||
|
.allow_fp16 = true,
|
||||||
|
};
|
||||||
|
NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
|
||||||
|
|
||||||
nir_lower_tex_options lower_tex_options = {
|
nir_lower_tex_options lower_tex_options = {
|
||||||
.lower_txs_lod = true,
|
.lower_txs_lod = true,
|
||||||
|
Reference in New Issue
Block a user