nir/lower_idiv: add new llvm-based path
v2: make variable names snake_case v2: minor cleanups in emit_udiv() v2: fix Panfrost build failure v3: use an enum instead of a boolean flag in nir_lower_idiv()'s signature v4: remove nir_op_urcp v5: drop nv50 path v5: rebase v6: add back nv50 path v6: add comment for nir_lower_idiv_path enum v7: rename _nv50/_llvm to _fast/_precise v8: fix etnaviv build failure Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
This commit is contained in:
@@ -3917,7 +3917,19 @@ enum nir_lower_non_uniform_access_type {
|
||||
bool nir_lower_non_uniform_access(nir_shader *shader,
|
||||
enum nir_lower_non_uniform_access_type);
|
||||
|
||||
bool nir_lower_idiv(nir_shader *shader);
|
||||
enum nir_lower_idiv_path {
|
||||
/* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of
|
||||
* the two but it is not exact in some cases (for example, 1091317713u /
|
||||
* 1034u gives 5209173 instead of 1055432) */
|
||||
nir_lower_idiv_fast,
|
||||
/* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and
|
||||
* AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than
|
||||
* the nv50 path and many of them are integer multiplications, so it is
|
||||
* probably slower. It should always return the correct result, though. */
|
||||
nir_lower_idiv_precise,
|
||||
};
|
||||
|
||||
bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path);
|
||||
|
||||
bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval);
|
||||
|
||||
|
Reference in New Issue
Block a user