nir: Add umad24 and umul24 opcodes
So far only the singed versions are defined. v2: Make umad24 and umul24 non-driver specific (Eric Anholt) v3: Take care of nir_builder and automatic lowering of the opcodes if they are not supported by the backend. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Eric Anholt <eric@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4610>
This commit is contained in:
@@ -2996,6 +2996,14 @@ typedef struct nir_shader_compiler_options {
|
||||
*/
|
||||
bool has_imul24;
|
||||
|
||||
/** Backend supports umul24, if not set umul24 will automatically be lowered
|
||||
* to imul with masked inputs */
|
||||
bool has_umul24;
|
||||
|
||||
/** Backend supports umad24, if not set umad24 will automatically be lowered
|
||||
* to imul with masked inputs and iadd */
|
||||
bool has_umad24;
|
||||
|
||||
/* Whether to generate only scoped_memory_barrier intrinsics instead of the
|
||||
* set of memory barrier intrinsics based on GLSL.
|
||||
*/
|
||||
|
@@ -100,22 +100,6 @@ nir_flog(nir_builder *b, nir_ssa_def *x)
|
||||
return nir_fmul_imm(b, nir_flog2(b, x), 1.0 / M_LOG2E);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_umul24(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
||||
{
|
||||
nir_ssa_def *mask = nir_imm_int(b, 0xffffff);
|
||||
nir_ssa_def *x_24 = nir_iand(b, x, mask);
|
||||
nir_ssa_def *y_24 = nir_iand(b, y, mask);
|
||||
return nir_imul(b, x_24, y_24);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_umad24(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z)
|
||||
{
|
||||
nir_ssa_def *temp = nir_umul24(b, x, y);
|
||||
return nir_iadd(b, temp, z);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_imad24(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z)
|
||||
{
|
||||
|
@@ -1137,3 +1137,11 @@ triop("imad24_ir3", tint32, _2src_commutative,
|
||||
# 24b multiply into 32b result (with sign extension)
|
||||
binop("imul24", tint32, _2src_commutative + associative,
|
||||
"(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)")
|
||||
|
||||
# unsigned 24b multiply into 32b result plus 32b int
|
||||
triop("umad24", tuint32, _2src_commutative,
|
||||
"(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8) + src2")
|
||||
|
||||
# unsigned 24b multiply into 32b result uint
|
||||
binop("umul24", tint32, _2src_commutative + associative,
|
||||
"(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8)")
|
||||
|
@@ -1313,6 +1313,13 @@ optimizations.extend([
|
||||
# rule converts everyone else to imul:
|
||||
(('amul', a, b), ('imul', a, b), '!options->has_imul24'),
|
||||
|
||||
(('umul24', a, b),
|
||||
('imul', ('iand', a, 0xffffff), ('iand', b, 0xffffff)),
|
||||
'!options->has_umul24'),
|
||||
(('umad24', a, b, c),
|
||||
('iadd', ('imul', ('iand', a, 0xffffff), ('iand', b, 0xffffff)), c),
|
||||
'!options->has_umad24'),
|
||||
|
||||
(('imad24_ir3', a, b, 0), ('imul24', a, b)),
|
||||
(('imad24_ir3', a, 0, c), (c)),
|
||||
(('imad24_ir3', a, 1, c), ('iadd', a, c)),
|
||||
|
Reference in New Issue
Block a user