diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index 79b9ef2e2f5..6c0159b07e9 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -168,15 +168,73 @@ where res } -// mostly like clc_spirv_to_dxil -// does not DCEe uniforms or images! +fn opt_nir(nir: &mut NirShader, dev: &Device) { + let nir_options = unsafe { + &*dev + .screen + .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE) + }; + + while { + let mut progress = false; + + progress |= nir.pass0(nir_copy_prop); + progress |= nir.pass0(nir_opt_copy_prop_vars); + progress |= nir.pass0(nir_opt_dead_write_vars); + + if nir_options.lower_to_scalar { + nir.pass2( + nir_lower_alu_to_scalar, + nir_options.lower_to_scalar_filter, + ptr::null(), + ); + nir.pass1(nir_lower_phis_to_scalar, false); + } + + progress |= nir.pass0(nir_opt_deref); + progress |= nir.pass0(nir_opt_memcpy); + progress |= nir.pass0(nir_opt_dce); + progress |= nir.pass0(nir_opt_undef); + progress |= nir.pass0(nir_opt_constant_folding); + progress |= nir.pass0(nir_opt_cse); + nir.pass0(nir_split_var_copies); + progress |= nir.pass0(nir_lower_var_copies); + progress |= nir.pass0(nir_lower_vars_to_ssa); + nir.pass0(nir_lower_alu); + nir.pass0(nir_lower_pack); + progress |= nir.pass0(nir_opt_phi_precision); + progress |= nir.pass0(nir_opt_algebraic); + progress |= nir.pass1( + nir_opt_if, + nir_opt_if_options::nir_opt_if_aggressive_last_continue + | nir_opt_if_options::nir_opt_if_optimize_phi_true_false, + ); + progress |= nir.pass0(nir_opt_dead_cf); + progress |= nir.pass0(nir_opt_remove_phis); + // we don't want to be too aggressive here, but it kills a bit of CFG + progress |= nir.pass3(nir_opt_peephole_select, 8, true, true); + progress |= nir.pass1( + nir_lower_vec3_to_vec4, + nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform, + ); + + if nir_options.max_unroll_iterations != 0 { + progress |= nir.pass0(nir_opt_loop_unroll); + } + nir.sweep_mem(); + progress + } {} +} + fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: &NirShader) { nir.set_workgroup_size_variable_if_zero(); nir.structurize(); while { let mut progress = false; + nir.pass0(nir_split_var_copies); progress |= nir.pass0(nir_copy_prop); progress |= nir.pass0(nir_opt_copy_prop_vars); + progress |= nir.pass0(nir_opt_dead_write_vars); progress |= nir.pass0(nir_opt_deref); progress |= nir.pass0(nir_opt_dce); progress |= nir.pass0(nir_opt_undef); @@ -190,40 +248,16 @@ fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: nir.remove_non_entrypoints(); // that should free up tons of memory nir.sweep_mem(); - while { - let mut progress = false; - progress |= nir.pass0(nir_copy_prop); - progress |= nir.pass0(nir_opt_copy_prop_vars); - progress |= nir.pass0(nir_opt_deref); - progress |= nir.pass0(nir_opt_dce); - progress |= nir.pass0(nir_opt_undef); - progress |= nir.pass0(nir_opt_constant_folding); - progress |= nir.pass0(nir_opt_cse); - progress |= nir.pass0(nir_split_var_copies); - progress |= nir.pass0(nir_lower_var_copies); - progress |= nir.pass0(nir_lower_vars_to_ssa); - progress |= nir.pass0(nir_opt_algebraic); - progress |= nir.pass1( - nir_opt_if, - nir_opt_if_options::nir_opt_if_aggressive_last_continue - | nir_opt_if_options::nir_opt_if_optimize_phi_true_false, - ); - progress |= nir.pass0(nir_opt_dead_cf); - progress |= nir.pass0(nir_opt_remove_phis); - // we don't want to be too aggressive here, but it kills a bit of CFG - progress |= nir.pass3(nir_opt_peephole_select, 1, true, true); - progress |= nir.pass1( - nir_lower_vec3_to_vec4, - nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform, - ); - progress - } {} - // TODO variable initializers - // TODO lower memcpy + nir.pass0(nir_dedup_inline_samplers); nir.pass2( nir_lower_vars_to_explicit_types, - nir_variable_mode::nir_var_function_temp, + nir_variable_mode::nir_var_uniform + | nir_variable_mode::nir_var_function_temp + | nir_variable_mode::nir_var_shader_temp + | nir_variable_mode::nir_var_mem_shared + | nir_variable_mode::nir_var_mem_generic + | nir_variable_mode::nir_var_mem_global, Some(glsl_get_cl_type_size_align), ); @@ -232,13 +266,7 @@ fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: printf_opts.max_buffer_size = dev.printf_buffer_size() as u32; nir.pass1(nir_lower_printf, &printf_opts); - nir.pass0(nir_split_var_copies); - nir.pass0(nir_opt_copy_prop_vars); - nir.pass0(nir_lower_var_copies); - nir.pass0(nir_lower_vars_to_ssa); - nir.pass0(nir_lower_alu); - nir.pass0(nir_opt_dce); - nir.pass0(nir_opt_deref); + opt_nir(nir, dev); } extern "C" fn can_remove_var(var: *mut nir_variable, _: *mut c_void) -> bool { @@ -316,7 +344,6 @@ fn lower_and_optimize_nir_late( ); nir.extract_constant_initializers(); - // TODO printf // TODO 32 bit devices // add vars for global offsets res.push(InternalKernelArg { @@ -382,26 +409,17 @@ fn lower_and_optimize_nir_late( | nir_variable_mode::nir_var_uniform, nir_address_format::nir_address_format_32bit_offset_as_64bit, ); - nir.pass0(nir_opt_deref); - nir.pass0(nir_lower_vars_to_ssa); - - // TODO whatever clc is doing here - - if nir_options.lower_to_scalar { - nir.pass2( - nir_lower_alu_to_scalar, - nir_options.lower_to_scalar_filter, - ptr::null(), - ); - } if nir_options.lower_int64_options.0 != 0 { nir.pass0(nir_lower_int64); } nir.pass1(nir_lower_convert_alu_types, None); - nir.pass0(nir_opt_dce); + + opt_nir(nir, dev); dev.screen.finalize_nir(nir); + + nir.pass0(nir_opt_dce); nir.sweep_mem(); res }