rusticl/kernel: skip adding global id offsets if not used
This allows us to shrink the kernel input buffer quite significantly as the offset is a vec aligned size_t3 value. Signed-off-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25303>
This commit is contained in:
@@ -518,21 +518,28 @@ fn lower_and_optimize_nir(
|
|||||||
);
|
);
|
||||||
nir.extract_constant_initializers();
|
nir.extract_constant_initializers();
|
||||||
|
|
||||||
// TODO 32 bit devices
|
// run before gather info
|
||||||
// add vars for global offsets
|
nir_pass!(nir, nir_lower_system_values);
|
||||||
internal_args.push(InternalKernelArg {
|
let mut compute_options = nir_lower_compute_system_values_options::default();
|
||||||
kind: InternalKernelArgType::GlobalWorkOffsets,
|
compute_options.set_has_base_global_invocation_id(true);
|
||||||
offset: 0,
|
nir_pass!(nir, nir_lower_compute_system_values, &compute_options);
|
||||||
size: (3 * dev.address_bits() / 8) as usize,
|
nir.gather_info();
|
||||||
});
|
|
||||||
|
if nir.reads_sysval(gl_system_value::SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID) {
|
||||||
|
internal_args.push(InternalKernelArg {
|
||||||
|
kind: InternalKernelArgType::GlobalWorkOffsets,
|
||||||
|
offset: 0,
|
||||||
|
size: (3 * dev.address_bits() / 8) as usize,
|
||||||
|
});
|
||||||
|
lower_state.base_global_invoc_id_loc = args.len() + internal_args.len() - 1;
|
||||||
|
nir.add_var(
|
||||||
|
nir_variable_mode::nir_var_uniform,
|
||||||
|
unsafe { glsl_vector_type(address_bits_base_type, 3) },
|
||||||
|
lower_state.base_global_invoc_id_loc,
|
||||||
|
"base_global_invocation_id",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
lower_state.base_global_invoc_id_loc = args.len() + internal_args.len() - 1;
|
|
||||||
nir.add_var(
|
|
||||||
nir_variable_mode::nir_var_uniform,
|
|
||||||
unsafe { glsl_vector_type(address_bits_base_type, 3) },
|
|
||||||
lower_state.base_global_invoc_id_loc,
|
|
||||||
"base_global_invocation_id",
|
|
||||||
);
|
|
||||||
if nir.has_constant() {
|
if nir.has_constant() {
|
||||||
internal_args.push(InternalKernelArg {
|
internal_args.push(InternalKernelArg {
|
||||||
kind: InternalKernelArgType::ConstantBuffer,
|
kind: InternalKernelArgType::ConstantBuffer,
|
||||||
@@ -562,12 +569,6 @@ fn lower_and_optimize_nir(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// run before gather info
|
|
||||||
nir_pass!(nir, nir_lower_system_values);
|
|
||||||
let mut compute_options = nir_lower_compute_system_values_options::default();
|
|
||||||
compute_options.set_has_base_global_invocation_id(true);
|
|
||||||
nir_pass!(nir, nir_lower_compute_system_values, &compute_options);
|
|
||||||
nir.gather_info();
|
|
||||||
if nir.num_images() > 0 || nir.num_textures() > 0 {
|
if nir.num_images() > 0 || nir.num_textures() > 0 {
|
||||||
let count = nir.num_images() + nir.num_textures();
|
let count = nir.num_images() + nir.num_textures();
|
||||||
internal_args.push(InternalKernelArg {
|
internal_args.push(InternalKernelArg {
|
||||||
|
Reference in New Issue
Block a user