nir: unify lower_bitfield_extract with has_bfe
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24662>
This commit is contained in:
@@ -98,6 +98,7 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
|
||||
.lower_hadd = true,
|
||||
.lower_mul_32x16 = true,
|
||||
.lower_uclz = true,
|
||||
.has_bfe = true,
|
||||
.has_bfm = true,
|
||||
.has_bitfield_select = true,
|
||||
.has_fsub = true,
|
||||
|
@@ -194,7 +194,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
.lower_cs_local_id_to_index = true,
|
||||
|
@@ -3411,10 +3411,8 @@ typedef struct nir_shader_compiler_options {
|
||||
bool lower_fsqrt;
|
||||
bool lower_sincos;
|
||||
bool lower_fmod;
|
||||
/** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
|
||||
/** Lowers ibitfield_extract/ubitfield_extract. */
|
||||
bool lower_bitfield_extract;
|
||||
/** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
|
||||
bool lower_bitfield_extract_to_shifts;
|
||||
/** Lowers bitfield_insert. */
|
||||
bool lower_bitfield_insert;
|
||||
/** Lowers bitfield_reverse to shifts. */
|
||||
@@ -3744,6 +3742,9 @@ typedef struct nir_shader_compiler_options {
|
||||
/** Backend supports bitz/bitnz. */
|
||||
bool has_bit_test;
|
||||
|
||||
/** Backend supports ubfe/ibfe. */
|
||||
bool has_bfe;
|
||||
|
||||
/** Backend supports bfm. */
|
||||
bool has_bfm;
|
||||
|
||||
|
@@ -1177,15 +1177,6 @@ ${pass_name}(nir_shader *shader)
|
||||
(void) options;
|
||||
(void) info;
|
||||
|
||||
/* This is not a great place for this, but it seems to be the best place
|
||||
* for it. Check that at most one kind of lowering is requested for
|
||||
* bitfield extract and bitfield insert. Otherwise the lowering can fight
|
||||
* with each other and optimizations.
|
||||
*/
|
||||
assert((int)options->lower_bitfield_extract +
|
||||
(int)options->lower_bitfield_extract_to_shifts <= 1);
|
||||
|
||||
|
||||
STATIC_ASSERT(${str(cache["next_index"])} == ARRAY_SIZE(${pass_name}_values));
|
||||
% for index, condition in enumerate(condition_list):
|
||||
condition_flags[${index}] = ${condition};
|
||||
|
@@ -1999,12 +1999,12 @@ optimizations.extend([
|
||||
(('ibitfield_extract', 'value', 'offset', 'bits'),
|
||||
('bcsel', ('ult', 31, 'bits'), 'value',
|
||||
('ibfe', 'value', 'offset', 'bits')),
|
||||
'options->lower_bitfield_extract'),
|
||||
'options->lower_bitfield_extract && options->has_bfe'),
|
||||
|
||||
(('ubitfield_extract', 'value', 'offset', 'bits'),
|
||||
('bcsel', ('ult', 31, 'bits'), 'value',
|
||||
('ubfe', 'value', 'offset', 'bits')),
|
||||
'options->lower_bitfield_extract'),
|
||||
'options->lower_bitfield_extract && options->has_bfe'),
|
||||
|
||||
# (src0 & src1) | (~src0 & src2). Constant fold if src2 is 0.
|
||||
(('bitfield_select', a, b, 0), ('iand', a, b)),
|
||||
@@ -2056,7 +2056,7 @@ optimizations.extend([
|
||||
('ishr',
|
||||
('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')),
|
||||
('isub', 32, 'bits'))),
|
||||
'options->lower_bitfield_extract_to_shifts'),
|
||||
'options->lower_bitfield_extract && !options->has_bfe'),
|
||||
|
||||
(('ubitfield_extract', 'value', 'offset', 'bits'),
|
||||
('iand',
|
||||
@@ -2064,7 +2064,7 @@ optimizations.extend([
|
||||
('bcsel', ('ieq', 'bits', 32),
|
||||
0xffffffff,
|
||||
('isub', ('ishl', 1, 'bits'), 1))),
|
||||
'options->lower_bitfield_extract_to_shifts'),
|
||||
'options->lower_bitfield_extract && !options->has_bfe'),
|
||||
|
||||
(('ifind_msb', 'value'),
|
||||
('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')),
|
||||
|
@@ -96,7 +96,7 @@ static const nir_shader_compiler_options ir3_base_options = {
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
|
@@ -135,7 +135,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
|
||||
.lower_flrp64 = true,
|
||||
.lower_fsat = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_fdph = true,
|
||||
.lower_ffma16 = true,
|
||||
.lower_ffma32 = true,
|
||||
|
@@ -589,7 +589,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
|
||||
.lower_flrp64 = true,
|
||||
.lower_fsat = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_fdot = true,
|
||||
.lower_fdph = true,
|
||||
.lower_ffma16 = true,
|
||||
|
@@ -1339,6 +1339,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
||||
.lower_fmod = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
@@ -1381,7 +1382,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
||||
rscreen->nir_options.force_indirect_unrolling_sampler = true;
|
||||
|
||||
if (rscreen->info.gfx_level >= EVERGREEN) {
|
||||
rscreen->nir_options.lower_bitfield_extract = true;
|
||||
rscreen->nir_options.has_bfe = true;
|
||||
rscreen->nir_options.has_bfm = true;
|
||||
rscreen->nir_options.has_bitfield_select = true;
|
||||
}
|
||||
@@ -1390,7 +1391,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
||||
/* Pre-EG doesn't have these ALU ops */
|
||||
rscreen->nir_options.lower_bit_count = true;
|
||||
rscreen->nir_options.lower_bitfield_reverse = true;
|
||||
rscreen->nir_options.lower_bitfield_extract_to_shifts = true;
|
||||
}
|
||||
|
||||
if (rscreen->info.gfx_level < CAYMAN) {
|
||||
|
@@ -1323,6 +1323,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||
.has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11,
|
||||
.has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
|
||||
.has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11,
|
||||
.has_bfe = true,
|
||||
.has_bfm = true,
|
||||
.has_bitfield_select = true,
|
||||
.optimize_sample_mask_in = true,
|
||||
|
@@ -700,7 +700,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
.lower_cs_local_id_to_index = true,
|
||||
|
@@ -40,6 +40,7 @@
|
||||
.lower_fisnormal = true, \
|
||||
.lower_isign = true, \
|
||||
.lower_ldexp = true, \
|
||||
.lower_bitfield_extract = true, \
|
||||
.lower_bitfield_insert = true, \
|
||||
.lower_device_index_to_zero = true, \
|
||||
.vectorize_io = true, \
|
||||
@@ -183,8 +184,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
||||
nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11;
|
||||
nir_options->lower_fpow = devinfo->ver >= 12;
|
||||
|
||||
nir_options->lower_bitfield_extract = devinfo->ver >= 7;
|
||||
nir_options->lower_bitfield_extract_to_shifts = devinfo->ver < 7;
|
||||
nir_options->has_bfe = devinfo->ver >= 7;
|
||||
nir_options->has_bfm = devinfo->ver >= 7;
|
||||
nir_options->has_bfi = devinfo->ver >= 7;
|
||||
|
||||
|
@@ -137,6 +137,7 @@ nir_options = {
|
||||
.lower_interpolate_at = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.has_bfe = true,
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_base_vertex = true,
|
||||
.lower_helper_invocation = true,
|
||||
|
@@ -3385,8 +3385,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
|
||||
op.lower_fsqrt = false; // TODO: only before gm200
|
||||
op.lower_sincos = false;
|
||||
op.lower_fmod = true;
|
||||
op.lower_bitfield_extract = false;
|
||||
op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bitfield_extract = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
|
||||
|
@@ -53,7 +53,7 @@ void bifrost_compile_shader_nir(nir_shader *nir,
|
||||
.lower_fsign = true, \
|
||||
\
|
||||
.lower_bitfield_insert = true, \
|
||||
.lower_bitfield_extract_to_shifts = true, \
|
||||
.lower_bitfield_extract = true, \
|
||||
.lower_insert_byte = true, \
|
||||
.lower_rotate = true, \
|
||||
\
|
||||
|
@@ -67,7 +67,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
|
||||
.lower_bit_count = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
|
Reference in New Issue
Block a user