nir/lower_idiv: add options to use fp32 for 8-bit division lowering

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10081>
This commit is contained in:
Rhys Perry
2021-04-07 19:17:46 +01:00
committed by Marge Bot
parent 7db8d307bc
commit a2619b97f5
11 changed files with 74 additions and 32 deletions

View File

@@ -4855,19 +4855,26 @@ enum nir_lower_non_uniform_access_type {
bool nir_lower_non_uniform_access(nir_shader *shader,
enum nir_lower_non_uniform_access_type);
enum nir_lower_idiv_path {
/* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of
* the two but it is not exact in some cases (for example, 1091317713u /
* 1034u gives 5209173 instead of 1055432) */
nir_lower_idiv_fast,
/* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and
* AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than
* the nv50 path and many of them are integer multiplications, so it is
* probably slower. It should always return the correct result, though. */
nir_lower_idiv_precise,
};
typedef struct {
/* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV()
* is used. It is the faster of the two but it is not exact in some cases
* (for example, 1091317713u / 1034u gives 5209173 instead of 1055432).
*
* If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and
* AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more
* instructions than the nv50 path and many of them are integer
* multiplications, so it is probably slower. It should always return the
* correct result, though.
*/
bool imprecise_32bit_lowering;
bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path);
/* Whether 16-bit floating point arithmetic should be allowed in 8-bit
* division lowering
*/
bool allow_fp16;
} nir_lower_idiv_options;
bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
typedef struct nir_input_attachment_options {
bool use_fragcoord_sysval;

View File

@@ -200,11 +200,12 @@ convert_instr_precise(nir_builder *bld, nir_op op,
static nir_ssa_def *
convert_instr_small(nir_builder *b, nir_op op,
nir_ssa_def *numer, nir_ssa_def *denom)
nir_ssa_def *numer, nir_ssa_def *denom,
const nir_lower_idiv_options *options)
{
unsigned sz = numer->bit_size;
nir_alu_type int_type = nir_op_infos[op].output_type | sz;
nir_alu_type float_type = nir_type_float | (sz * 2);
nir_alu_type float_type = nir_type_float | (options->allow_fp16 ? sz * 2 : 32);
nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type);
nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type);
@@ -240,18 +241,18 @@ convert_instr_small(nir_builder *b, nir_op op,
static nir_ssa_def *
lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
{
enum nir_lower_idiv_path *path = _data;
const nir_lower_idiv_options *options = _data;
nir_alu_instr *alu = nir_instr_as_alu(instr);
nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0);
nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1);
if (numer->bit_size < 32)
return convert_instr_small(b, alu->op, numer, denom);
else if (*path == nir_lower_idiv_precise)
return convert_instr_precise(b, alu->op, numer, denom);
else
return convert_instr_small(b, alu->op, numer, denom, options);
else if (options->imprecise_32bit_lowering)
return convert_instr(b, alu->op, numer, denom);
else
return convert_instr_precise(b, alu->op, numer, denom);
}
static bool
@@ -278,10 +279,10 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void *_state)
}
bool
nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path)
nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options)
{
return nir_shader_lower_instructions(shader,
inst_is_idiv,
lower_idiv,
&path);
(void *)options);
}