nir: define behavior of nir_op_bfm and nir_op_u/ibfe according to SM5 spec.
That is: the five least significant bits provide the values of 'bits' and 'offset' which is the case for all hardware currently supported by NIR and using the bfm/bfe instructions. This patch also changes the lowering of bitfield_insert/extract using shifts to not use bfm and removes the flag 'lower_bfm'. Tested-by: Eric Anholt <eric@anholt.net> Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:

committed by
Daniel Schürmann

parent
a74f256c58
commit
165b7f3a44
@@ -2392,7 +2392,6 @@ const nir_shader_compiler_options v3d_nir_options = {
|
|||||||
.lower_all_io_to_temps = true,
|
.lower_all_io_to_temps = true,
|
||||||
.lower_extract_byte = true,
|
.lower_extract_byte = true,
|
||||||
.lower_extract_word = true,
|
.lower_extract_word = true,
|
||||||
.lower_bfm = true,
|
|
||||||
.lower_bitfield_insert_to_shifts = true,
|
.lower_bitfield_insert_to_shifts = true,
|
||||||
.lower_bitfield_extract_to_shifts = true,
|
.lower_bitfield_extract_to_shifts = true,
|
||||||
.lower_bitfield_reverse = true,
|
.lower_bitfield_reverse = true,
|
||||||
|
@@ -2282,18 +2282,16 @@ typedef struct nir_shader_compiler_options {
|
|||||||
bool lower_fmod;
|
bool lower_fmod;
|
||||||
/** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
|
/** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
|
||||||
bool lower_bitfield_extract;
|
bool lower_bitfield_extract;
|
||||||
/** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */
|
/** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
|
||||||
bool lower_bitfield_extract_to_shifts;
|
bool lower_bitfield_extract_to_shifts;
|
||||||
/** Lowers bitfield_insert to bfi/bfm */
|
/** Lowers bitfield_insert to bfi/bfm */
|
||||||
bool lower_bitfield_insert;
|
bool lower_bitfield_insert;
|
||||||
/** Lowers bitfield_insert to bfm, compares, and shifts. */
|
/** Lowers bitfield_insert to compares, and shifts. */
|
||||||
bool lower_bitfield_insert_to_shifts;
|
bool lower_bitfield_insert_to_shifts;
|
||||||
/** Lowers bitfield_reverse to shifts. */
|
/** Lowers bitfield_reverse to shifts. */
|
||||||
bool lower_bitfield_reverse;
|
bool lower_bitfield_reverse;
|
||||||
/** Lowers bit_count to shifts. */
|
/** Lowers bit_count to shifts. */
|
||||||
bool lower_bit_count;
|
bool lower_bit_count;
|
||||||
/** Lowers bfm to shifts and subtracts. */
|
|
||||||
bool lower_bfm;
|
|
||||||
/** Lowers ifind_msb to compare and ufind_msb */
|
/** Lowers ifind_msb to compare and ufind_msb */
|
||||||
bool lower_ifind_msb;
|
bool lower_ifind_msb;
|
||||||
/** Lowers find_lsb to ufind_msb and logic ops */
|
/** Lowers find_lsb to ufind_msb and logic ops */
|
||||||
|
@@ -787,14 +787,12 @@ binop_convert("pack_32_2x16_split", tuint32, tuint16, "",
|
|||||||
"src0 | ((uint32_t)src1 << 16)")
|
"src0 | ((uint32_t)src1 << 16)")
|
||||||
|
|
||||||
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
|
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
|
||||||
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
|
# and that of the "bfi1" i965 instruction. That is, the bits and offset values
|
||||||
# if either of its arguments are 32.
|
# are from the low five bits of src0 and src1, respectively.
|
||||||
binop_convert("bfm", tuint32, tint32, "", """
|
binop_convert("bfm", tuint32, tint32, "", """
|
||||||
int bits = src0, offset = src1;
|
int bits = src0 & 0x1F;
|
||||||
if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
|
int offset = src1 & 0x1F;
|
||||||
dst = 0; /* undefined */
|
dst = ((1u << bits) - 1) << offset;
|
||||||
else
|
|
||||||
dst = ((1u << bits) - 1) << offset;
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
|
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
|
||||||
@@ -873,15 +871,14 @@ if (mask == 0) {
|
|||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# SM5 ubfe/ibfe assembly
|
# SM5 ubfe/ibfe assembly: only the 5 least significant bits of offset and bits are used.
|
||||||
opcode("ubfe", 0, tuint32,
|
opcode("ubfe", 0, tuint32,
|
||||||
[0, 0, 0], [tuint32, tint32, tint32], False, "", """
|
[0, 0, 0], [tuint32, tuint32, tuint32], False, "", """
|
||||||
unsigned base = src0;
|
unsigned base = src0;
|
||||||
int offset = src1, bits = src2;
|
unsigned offset = src1 & 0x1F;
|
||||||
|
unsigned bits = src2 & 0x1F;
|
||||||
if (bits == 0) {
|
if (bits == 0) {
|
||||||
dst = 0;
|
dst = 0;
|
||||||
} else if (bits < 0 || offset < 0) {
|
|
||||||
dst = 0; /* undefined */
|
|
||||||
} else if (offset + bits < 32) {
|
} else if (offset + bits < 32) {
|
||||||
dst = (base << (32 - bits - offset)) >> (32 - bits);
|
dst = (base << (32 - bits - offset)) >> (32 - bits);
|
||||||
} else {
|
} else {
|
||||||
@@ -889,13 +886,12 @@ if (bits == 0) {
|
|||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
opcode("ibfe", 0, tint32,
|
opcode("ibfe", 0, tint32,
|
||||||
[0, 0, 0], [tint32, tint32, tint32], False, "", """
|
[0, 0, 0], [tint32, tuint32, tuint32], False, "", """
|
||||||
int base = src0;
|
int base = src0;
|
||||||
int offset = src1, bits = src2;
|
unsigned offset = src1 & 0x1F;
|
||||||
|
unsigned bits = src2 & 0x1F;
|
||||||
if (bits == 0) {
|
if (bits == 0) {
|
||||||
dst = 0;
|
dst = 0;
|
||||||
} else if (bits < 0 || offset < 0) {
|
|
||||||
dst = 0; /* undefined */
|
|
||||||
} else if (offset + bits < 32) {
|
} else if (offset + bits < 32) {
|
||||||
dst = (base << (32 - bits - offset)) >> (32 - bits);
|
dst = (base << (32 - bits - offset)) >> (32 - bits);
|
||||||
} else {
|
} else {
|
||||||
|
@@ -794,18 +794,11 @@ optimizations.extend([
|
|||||||
|
|
||||||
# Alternative lowering that doesn't rely on bfi.
|
# Alternative lowering that doesn't rely on bfi.
|
||||||
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
|
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
|
||||||
('bcsel', ('ilt', 31, 'bits'),
|
('ior',
|
||||||
'insert',
|
('iand', 'base', ('inot', ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
|
||||||
('ior',
|
('iand', ('ishl', 'insert', 'offset'), ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
|
||||||
('iand', 'base', ('inot', ('bfm', 'bits', 'offset'))),
|
|
||||||
('iand', ('ishl', 'insert', 'offset'), ('bfm', 'bits', 'offset')))),
|
|
||||||
'options->lower_bitfield_insert_to_shifts'),
|
'options->lower_bitfield_insert_to_shifts'),
|
||||||
|
|
||||||
# bfm lowering -- note that the NIR opcode is undefined if either arg is 32.
|
|
||||||
(('bfm', 'bits', 'offset'),
|
|
||||||
('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'),
|
|
||||||
'options->lower_bfm'),
|
|
||||||
|
|
||||||
(('ibitfield_extract', 'value', 'offset', 'bits'),
|
(('ibitfield_extract', 'value', 'offset', 'bits'),
|
||||||
('bcsel', ('ilt', 31, 'bits'), 'value',
|
('bcsel', ('ilt', 31, 'bits'), 'value',
|
||||||
('ibfe', 'value', 'offset', 'bits')),
|
('ibfe', 'value', 'offset', 'bits')),
|
||||||
@@ -829,7 +822,7 @@ optimizations.extend([
|
|||||||
('ushr', 'value', 'offset'),
|
('ushr', 'value', 'offset'),
|
||||||
('bcsel', ('ieq', 'bits', 32),
|
('bcsel', ('ieq', 'bits', 32),
|
||||||
0xffffffff,
|
0xffffffff,
|
||||||
('bfm', 'bits', 0))),
|
('isub', ('ishl', 1, 'bits'), 1))),
|
||||||
'options->lower_bitfield_extract_to_shifts'),
|
'options->lower_bitfield_extract_to_shifts'),
|
||||||
|
|
||||||
(('ifind_msb', 'value'),
|
(('ifind_msb', 'value'),
|
||||||
|
@@ -54,7 +54,6 @@ static const nir_shader_compiler_options options = {
|
|||||||
.lower_helper_invocation = true,
|
.lower_helper_invocation = true,
|
||||||
.lower_bitfield_insert_to_shifts = true,
|
.lower_bitfield_insert_to_shifts = true,
|
||||||
.lower_bitfield_extract_to_shifts = true,
|
.lower_bitfield_extract_to_shifts = true,
|
||||||
.lower_bfm = true,
|
|
||||||
.use_interpolated_input_intrinsics = true,
|
.use_interpolated_input_intrinsics = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -79,7 +78,6 @@ static const nir_shader_compiler_options options_a6xx = {
|
|||||||
.lower_helper_invocation = true,
|
.lower_helper_invocation = true,
|
||||||
.lower_bitfield_insert_to_shifts = true,
|
.lower_bitfield_insert_to_shifts = true,
|
||||||
.lower_bitfield_extract_to_shifts = true,
|
.lower_bitfield_extract_to_shifts = true,
|
||||||
.lower_bfm = true,
|
|
||||||
.use_interpolated_input_intrinsics = true,
|
.use_interpolated_input_intrinsics = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -912,7 +912,6 @@ static const nir_shader_compiler_options nir_options = {
|
|||||||
.lower_bitfield_insert_to_shifts = false,
|
.lower_bitfield_insert_to_shifts = false,
|
||||||
.lower_bitfield_reverse = false,
|
.lower_bitfield_reverse = false,
|
||||||
.lower_bit_count = false,
|
.lower_bit_count = false,
|
||||||
.lower_bfm = false,
|
|
||||||
.lower_ifind_msb = false,
|
.lower_ifind_msb = false,
|
||||||
.lower_find_lsb = false,
|
.lower_find_lsb = false,
|
||||||
.lower_uadd_carry = true, // TODO
|
.lower_uadd_carry = true, // TODO
|
||||||
|
Reference in New Issue
Block a user