rusticl/kernel: run some more opt passes

Signed-off-by: Karol Herbst <kherbst@redhat.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
2022-04-29 03:20:36 +02:00
parent 2522fbf32d
commit d747323a20
1 changed files with 72 additions and 54 deletions
--- a/src/gallium/frontends/rusticl/core/kernel.rs
+++ b/src/gallium/frontends/rusticl/core/kernel.rs
@@ -168,15 +168,73 @@ where
    res
 }

-// mostly like clc_spirv_to_dxil
-// does not DCEe uniforms or images!
+fn opt_nir(nir: &mut NirShader, dev: &Device) {
+    let nir_options = unsafe {
+        &*dev
+            .screen
+            .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
+    };
+
+    while {
+        let mut progress = false;
+
+        progress |= nir.pass0(nir_copy_prop);
+        progress |= nir.pass0(nir_opt_copy_prop_vars);
+        progress |= nir.pass0(nir_opt_dead_write_vars);
+
+        if nir_options.lower_to_scalar {
+            nir.pass2(
+                nir_lower_alu_to_scalar,
+                nir_options.lower_to_scalar_filter,
+                ptr::null(),
+            );
+            nir.pass1(nir_lower_phis_to_scalar, false);
+        }
+
+        progress |= nir.pass0(nir_opt_deref);
+        progress |= nir.pass0(nir_opt_memcpy);
+        progress |= nir.pass0(nir_opt_dce);
+        progress |= nir.pass0(nir_opt_undef);
+        progress |= nir.pass0(nir_opt_constant_folding);
+        progress |= nir.pass0(nir_opt_cse);
+        nir.pass0(nir_split_var_copies);
+        progress |= nir.pass0(nir_lower_var_copies);
+        progress |= nir.pass0(nir_lower_vars_to_ssa);
+        nir.pass0(nir_lower_alu);
+        nir.pass0(nir_lower_pack);
+        progress |= nir.pass0(nir_opt_phi_precision);
+        progress |= nir.pass0(nir_opt_algebraic);
+        progress |= nir.pass1(
+            nir_opt_if,
+            nir_opt_if_options::nir_opt_if_aggressive_last_continue
+                | nir_opt_if_options::nir_opt_if_optimize_phi_true_false,
+        );
+        progress |= nir.pass0(nir_opt_dead_cf);
+        progress |= nir.pass0(nir_opt_remove_phis);
+        // we don't want to be too aggressive here, but it kills a bit of CFG
+        progress |= nir.pass3(nir_opt_peephole_select, 8, true, true);
+        progress |= nir.pass1(
+            nir_lower_vec3_to_vec4,
+            nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform,
+        );
+
+        if nir_options.max_unroll_iterations != 0 {
+            progress |= nir.pass0(nir_opt_loop_unroll);
+        }
+        nir.sweep_mem();
+        progress
+    } {}
+}
+
 fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: &NirShader) {
    nir.set_workgroup_size_variable_if_zero();
    nir.structurize();
    while {
        let mut progress = false;
+        nir.pass0(nir_split_var_copies);
        progress |= nir.pass0(nir_copy_prop);
        progress |= nir.pass0(nir_opt_copy_prop_vars);
+        progress |= nir.pass0(nir_opt_dead_write_vars);
        progress |= nir.pass0(nir_opt_deref);
        progress |= nir.pass0(nir_opt_dce);
        progress |= nir.pass0(nir_opt_undef);
@@ -190,40 +248,16 @@ fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc:
    nir.remove_non_entrypoints();
    // that should free up tons of memory
    nir.sweep_mem();
-    while {
-        let mut progress = false;
-        progress |= nir.pass0(nir_copy_prop);
-        progress |= nir.pass0(nir_opt_copy_prop_vars);
-        progress |= nir.pass0(nir_opt_deref);
-        progress |= nir.pass0(nir_opt_dce);
-        progress |= nir.pass0(nir_opt_undef);
-        progress |= nir.pass0(nir_opt_constant_folding);
-        progress |= nir.pass0(nir_opt_cse);
-        progress |= nir.pass0(nir_split_var_copies);
-        progress |= nir.pass0(nir_lower_var_copies);
-        progress |= nir.pass0(nir_lower_vars_to_ssa);
-        progress |= nir.pass0(nir_opt_algebraic);
-        progress |= nir.pass1(
-            nir_opt_if,
-            nir_opt_if_options::nir_opt_if_aggressive_last_continue
-                | nir_opt_if_options::nir_opt_if_optimize_phi_true_false,
-        );
-        progress |= nir.pass0(nir_opt_dead_cf);
-        progress |= nir.pass0(nir_opt_remove_phis);
-        // we don't want to be too aggressive here, but it kills a bit of CFG
-        progress |= nir.pass3(nir_opt_peephole_select, 1, true, true);
-        progress |= nir.pass1(
-            nir_lower_vec3_to_vec4,
-            nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform,
-        );
-        progress
-    } {}
-    // TODO variable initializers
-    // TODO lower memcpy
+
    nir.pass0(nir_dedup_inline_samplers);
    nir.pass2(
        nir_lower_vars_to_explicit_types,
-        nir_variable_mode::nir_var_function_temp,
+        nir_variable_mode::nir_var_uniform
+            | nir_variable_mode::nir_var_function_temp
+            | nir_variable_mode::nir_var_shader_temp
+            | nir_variable_mode::nir_var_mem_shared
+            | nir_variable_mode::nir_var_mem_generic
+            | nir_variable_mode::nir_var_mem_global,
        Some(glsl_get_cl_type_size_align),
    );

@@ -232,13 +266,7 @@ fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc:
    printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
    nir.pass1(nir_lower_printf, &printf_opts);

-    nir.pass0(nir_split_var_copies);
-    nir.pass0(nir_opt_copy_prop_vars);
-    nir.pass0(nir_lower_var_copies);
-    nir.pass0(nir_lower_vars_to_ssa);
-    nir.pass0(nir_lower_alu);
-    nir.pass0(nir_opt_dce);
-    nir.pass0(nir_opt_deref);
+    opt_nir(nir, dev);
 }

 extern "C" fn can_remove_var(var: *mut nir_variable, _: *mut c_void) -> bool {
@@ -316,7 +344,6 @@ fn lower_and_optimize_nir_late(
    );
    nir.extract_constant_initializers();

-    // TODO printf
    // TODO 32 bit devices
    // add vars for global offsets
    res.push(InternalKernelArg {
@@ -382,26 +409,17 @@ fn lower_and_optimize_nir_late(
            | nir_variable_mode::nir_var_uniform,
        nir_address_format::nir_address_format_32bit_offset_as_64bit,
    );
-    nir.pass0(nir_opt_deref);
-    nir.pass0(nir_lower_vars_to_ssa);
-
-    // TODO whatever clc is doing here
-
-    if nir_options.lower_to_scalar {
-        nir.pass2(
-            nir_lower_alu_to_scalar,
-            nir_options.lower_to_scalar_filter,
-            ptr::null(),
-        );
-    }

    if nir_options.lower_int64_options.0 != 0 {
        nir.pass0(nir_lower_int64);
    }

    nir.pass1(nir_lower_convert_alu_types, None);
-    nir.pass0(nir_opt_dce);
+
+    opt_nir(nir, dev);
    dev.screen.finalize_nir(nir);
+
+    nir.pass0(nir_opt_dce);
    nir.sweep_mem();
    res
 }