rusticl/kernel: run some more opt passes

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
Karol Herbst
2022-04-29 03:20:36 +02:00
committed by Marge Bot
parent 2522fbf32d
commit d747323a20

View File

@@ -168,15 +168,73 @@ where
res
}
// mostly like clc_spirv_to_dxil
// does not DCEe uniforms or images!
fn opt_nir(nir: &mut NirShader, dev: &Device) {
let nir_options = unsafe {
&*dev
.screen
.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
};
while {
let mut progress = false;
progress |= nir.pass0(nir_copy_prop);
progress |= nir.pass0(nir_opt_copy_prop_vars);
progress |= nir.pass0(nir_opt_dead_write_vars);
if nir_options.lower_to_scalar {
nir.pass2(
nir_lower_alu_to_scalar,
nir_options.lower_to_scalar_filter,
ptr::null(),
);
nir.pass1(nir_lower_phis_to_scalar, false);
}
progress |= nir.pass0(nir_opt_deref);
progress |= nir.pass0(nir_opt_memcpy);
progress |= nir.pass0(nir_opt_dce);
progress |= nir.pass0(nir_opt_undef);
progress |= nir.pass0(nir_opt_constant_folding);
progress |= nir.pass0(nir_opt_cse);
nir.pass0(nir_split_var_copies);
progress |= nir.pass0(nir_lower_var_copies);
progress |= nir.pass0(nir_lower_vars_to_ssa);
nir.pass0(nir_lower_alu);
nir.pass0(nir_lower_pack);
progress |= nir.pass0(nir_opt_phi_precision);
progress |= nir.pass0(nir_opt_algebraic);
progress |= nir.pass1(
nir_opt_if,
nir_opt_if_options::nir_opt_if_aggressive_last_continue
| nir_opt_if_options::nir_opt_if_optimize_phi_true_false,
);
progress |= nir.pass0(nir_opt_dead_cf);
progress |= nir.pass0(nir_opt_remove_phis);
// we don't want to be too aggressive here, but it kills a bit of CFG
progress |= nir.pass3(nir_opt_peephole_select, 8, true, true);
progress |= nir.pass1(
nir_lower_vec3_to_vec4,
nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform,
);
if nir_options.max_unroll_iterations != 0 {
progress |= nir.pass0(nir_opt_loop_unroll);
}
nir.sweep_mem();
progress
} {}
}
fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: &NirShader) {
nir.set_workgroup_size_variable_if_zero();
nir.structurize();
while {
let mut progress = false;
nir.pass0(nir_split_var_copies);
progress |= nir.pass0(nir_copy_prop);
progress |= nir.pass0(nir_opt_copy_prop_vars);
progress |= nir.pass0(nir_opt_dead_write_vars);
progress |= nir.pass0(nir_opt_deref);
progress |= nir.pass0(nir_opt_dce);
progress |= nir.pass0(nir_opt_undef);
@@ -190,40 +248,16 @@ fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc:
nir.remove_non_entrypoints();
// that should free up tons of memory
nir.sweep_mem();
while {
let mut progress = false;
progress |= nir.pass0(nir_copy_prop);
progress |= nir.pass0(nir_opt_copy_prop_vars);
progress |= nir.pass0(nir_opt_deref);
progress |= nir.pass0(nir_opt_dce);
progress |= nir.pass0(nir_opt_undef);
progress |= nir.pass0(nir_opt_constant_folding);
progress |= nir.pass0(nir_opt_cse);
progress |= nir.pass0(nir_split_var_copies);
progress |= nir.pass0(nir_lower_var_copies);
progress |= nir.pass0(nir_lower_vars_to_ssa);
progress |= nir.pass0(nir_opt_algebraic);
progress |= nir.pass1(
nir_opt_if,
nir_opt_if_options::nir_opt_if_aggressive_last_continue
| nir_opt_if_options::nir_opt_if_optimize_phi_true_false,
);
progress |= nir.pass0(nir_opt_dead_cf);
progress |= nir.pass0(nir_opt_remove_phis);
// we don't want to be too aggressive here, but it kills a bit of CFG
progress |= nir.pass3(nir_opt_peephole_select, 1, true, true);
progress |= nir.pass1(
nir_lower_vec3_to_vec4,
nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform,
);
progress
} {}
// TODO variable initializers
// TODO lower memcpy
nir.pass0(nir_dedup_inline_samplers);
nir.pass2(
nir_lower_vars_to_explicit_types,
nir_variable_mode::nir_var_function_temp,
nir_variable_mode::nir_var_uniform
| nir_variable_mode::nir_var_function_temp
| nir_variable_mode::nir_var_shader_temp
| nir_variable_mode::nir_var_mem_shared
| nir_variable_mode::nir_var_mem_generic
| nir_variable_mode::nir_var_mem_global,
Some(glsl_get_cl_type_size_align),
);
@@ -232,13 +266,7 @@ fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc:
printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
nir.pass1(nir_lower_printf, &printf_opts);
nir.pass0(nir_split_var_copies);
nir.pass0(nir_opt_copy_prop_vars);
nir.pass0(nir_lower_var_copies);
nir.pass0(nir_lower_vars_to_ssa);
nir.pass0(nir_lower_alu);
nir.pass0(nir_opt_dce);
nir.pass0(nir_opt_deref);
opt_nir(nir, dev);
}
extern "C" fn can_remove_var(var: *mut nir_variable, _: *mut c_void) -> bool {
@@ -316,7 +344,6 @@ fn lower_and_optimize_nir_late(
);
nir.extract_constant_initializers();
// TODO printf
// TODO 32 bit devices
// add vars for global offsets
res.push(InternalKernelArg {
@@ -382,26 +409,17 @@ fn lower_and_optimize_nir_late(
| nir_variable_mode::nir_var_uniform,
nir_address_format::nir_address_format_32bit_offset_as_64bit,
);
nir.pass0(nir_opt_deref);
nir.pass0(nir_lower_vars_to_ssa);
// TODO whatever clc is doing here
if nir_options.lower_to_scalar {
nir.pass2(
nir_lower_alu_to_scalar,
nir_options.lower_to_scalar_filter,
ptr::null(),
);
}
if nir_options.lower_int64_options.0 != 0 {
nir.pass0(nir_lower_int64);
}
nir.pass1(nir_lower_convert_alu_types, None);
nir.pass0(nir_opt_dce);
opt_nir(nir, dev);
dev.screen.finalize_nir(nir);
nir.pass0(nir_opt_dce);
nir.sweep_mem();
res
}