nir: add amul instruction

Used for address/offset calculation (ie. array derefs), where we can
potentially use less than 32b for the multiply of array idx by element
size.  For backends that support `imul24`, this gives a lowering pass
an easy way to find multiplies that potentially can be converted to
`imul24`.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
This commit is contained in:
Rob Clark
2019-09-26 10:32:00 -07:00
parent 0568761f8e
commit 6320e37d4b
7 changed files with 38 additions and 7 deletions

View File

@@ -59,7 +59,7 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
} else { } else {
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1); nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1)); arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1));
nir_ssa_def *arr_offset = nir_imul_imm(b, arr_index, array_elements); nir_ssa_def *arr_offset = nir_amul_imm(b, arr_index, array_elements);
if (nonconst_index) if (nonconst_index)
nonconst_index = nir_iadd(b, nonconst_index, arr_offset); nonconst_index = nir_iadd(b, nonconst_index, arr_offset);
else else

View File

@@ -646,7 +646,7 @@ nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
} }
static inline nir_ssa_def * static inline nir_ssa_def *
nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) _nir_mul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y, bool amul)
{ {
assert(x->bit_size <= 64); assert(x->bit_size <= 64);
if (x->bit_size < 64) if (x->bit_size < 64)
@@ -658,11 +658,25 @@ nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
return x; return x;
} else if (util_is_power_of_two_or_zero64(y)) { } else if (util_is_power_of_two_or_zero64(y)) {
return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1)); return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1));
} else if (amul) {
return nir_amul(build, x, nir_imm_intN_t(build, y, x->bit_size));
} else { } else {
return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size)); return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
} }
} }
static inline nir_ssa_def *
nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
{
return _nir_mul_imm(build, x, y, false);
}
static inline nir_ssa_def *
nir_amul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
{
return _nir_mul_imm(build, x, y, true);
}
static inline nir_ssa_def * static inline nir_ssa_def *
nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y) nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y)
{ {

View File

@@ -297,7 +297,7 @@ nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
if ((*p)->deref_type == nir_deref_type_array) { if ((*p)->deref_type == nir_deref_type_array) {
nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1); nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
int stride = type_get_array_stride((*p)->type, size_align); int stride = type_get_array_stride((*p)->type, size_align);
offset = nir_iadd(b, offset, nir_imul_imm(b, index, stride)); offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
} else if ((*p)->deref_type == nir_deref_type_struct) { } else if ((*p)->deref_type == nir_deref_type_struct) {
/* p starts at path[1], so this is safe */ /* p starts at path[1], so this is safe */
nir_deref_instr *parent = *(p - 1); nir_deref_instr *parent = *(p - 1);

View File

@@ -206,7 +206,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
unsigned size = type_size((*p)->type, bts); unsigned size = type_size((*p)->type, bts);
nir_ssa_def *mul = nir_ssa_def *mul =
nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
offset = nir_iadd(b, offset, mul); offset = nir_iadd(b, offset, mul);
} else if ((*p)->deref_type == nir_deref_type_struct) { } else if ((*p)->deref_type == nir_deref_type_struct) {
@@ -1094,7 +1094,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
index = nir_i2i(b, index, base_addr->bit_size); index = nir_i2i(b, index, base_addr->bit_size);
return build_addr_iadd(b, base_addr, addr_format, return build_addr_iadd(b, base_addr, addr_format,
nir_imul_imm(b, index, stride)); nir_amul_imm(b, index, stride));
} }
case nir_deref_type_ptr_as_array: { case nir_deref_type_ptr_as_array: {
@@ -1102,7 +1102,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
index = nir_i2i(b, index, base_addr->bit_size); index = nir_i2i(b, index, base_addr->bit_size);
unsigned stride = nir_deref_instr_ptr_as_array_stride(deref); unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
return build_addr_iadd(b, base_addr, addr_format, return build_addr_iadd(b, base_addr, addr_format,
nir_imul_imm(b, index, stride)); nir_amul_imm(b, index, stride));
} }
case nir_deref_type_array_wildcard: case nir_deref_type_array_wildcard:

View File

@@ -319,7 +319,7 @@ build_array_index(nir_builder *b, nir_deref_instr *deref, nir_ssa_def *base,
deref->dest.ssa.bit_size); deref->dest.ssa.bit_size);
return nir_iadd( return nir_iadd(
b, build_array_index(b, nir_deref_instr_parent(deref), base, vs_in), b, build_array_index(b, nir_deref_instr_parent(deref), base, vs_in),
nir_imul_imm(b, index, glsl_count_attribute_slots(deref->type, vs_in))); nir_amul_imm(b, index, glsl_count_attribute_slots(deref->type, vs_in)));
} }
default: default:
unreachable("Invalid deref instruction type"); unreachable("Invalid deref instruction type");

View File

@@ -1047,6 +1047,18 @@ dst.z = src2.x;
dst.w = src3.x; dst.w = src3.x;
""") """)
# An integer multiply instruction for address calculation. This is
# similar to imul, except that the results are undefined in case of
# overflow. Overflow is defined according to the size of the variable
# being dereferenced.
#
# This relaxed definition, compared to imul, allows an optimization
# pass to propagate bounds (ie, from an load/store intrinsic) to the
# sources, such that lower precision integer multiplies can be used.
# This is useful on hw that has 24b or perhaps 16b integer multiply
# instructions.
binop("amul", tint, _2src_commutative + associative, "src0 * src1")
# ir3-specific instruction that maps directly to mul-add shift high mix, # ir3-specific instruction that maps directly to mul-add shift high mix,
# (IMADSH_MIX16 i.e. ah * bl << 16 + c). It is used for lowering integer # (IMADSH_MIX16 i.e. ah * bl << 16 + c). It is used for lowering integer
# multiplication (imul) on Freedreno backend.. # multiplication (imul) on Freedreno backend..

View File

@@ -1112,6 +1112,11 @@ optimizations.extend([
(('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'), (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
(('fsign', a), ('fsub', ('b2f', ('flt', 0.0, a)), ('b2f', ('flt', a, 0.0))), 'options->lower_fsign'), (('fsign', a), ('fsub', ('b2f', ('flt', 0.0, a)), ('b2f', ('flt', a, 0.0))), 'options->lower_fsign'),
# Address/offset calculations:
# for now, unconditionally convert amul to imul, this will
# change in the following patch
(('amul', a, b), ('imul', a, b)),
]) ])
# bit_size dependent lowerings # bit_size dependent lowerings