From fe35a8b00e7b82688e4eaf2f3fe62a80af342638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 25 Mar 2024 16:41:41 -0400 Subject: [PATCH] nir: change "user_data_amd" sysval from 4 to 8 components so that we can pass more fast constants to compute shaders (without reading memory in the shader). Reviewed-by: Georg Lehmann Part-of: --- src/amd/common/ac_shader_args.c | 1 + src/compiler/nir/nir_intrinsics.py | 2 +- src/compiler/shader_info.h | 2 +- src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/amd/common/ac_shader_args.c b/src/amd/common/ac_shader_args.c index 9c86b80cfa8..6eee1e92a25 100644 --- a/src/amd/common/ac_shader_args.c +++ b/src/amd/common/ac_shader_args.c @@ -12,6 +12,7 @@ void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsign enum ac_arg_type type, struct ac_arg *arg) { assert(info->arg_count < AC_MAX_ARGS); + assert(nir_num_components_valid(size)); unsigned offset; if (regfile == AC_ARG_SGPR) { diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 2ea2e740a2f..3247119072c 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1012,7 +1012,7 @@ system_value("color0", 4) system_value("color1", 4) # System value for internal compute shaders in radeonsi. -system_value("user_data_amd", 4) +system_value("user_data_amd", 8) # In a fragment shader, the current sample mask. At the beginning of the shader, # this is the same as load_sample_mask_in, but as the shader is executed, it may diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index 5aae229197e..92d65f92133 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -532,7 +532,7 @@ typedef struct shader_info { struct { uint16_t workgroup_size_hint[3]; - uint8_t user_data_components_amd:3; + uint8_t user_data_components_amd:4; /* * Arrangement of invocations used to calculate derivatives in a compute diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 537056c043f..0feb201b255 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -746,7 +746,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s } case nir_intrinsic_load_user_data_amd: replacement = ac_nir_load_arg(b, &args->ac, args->cs_user_data); - replacement = nir_pad_vec4(b, replacement); + replacement = nir_pad_vector(b, replacement, 8); break; default: return false; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8a9d085572d..d603ba26839 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1159,7 +1159,7 @@ struct si_context { unsigned border_color_count; unsigned num_vs_blit_sgprs; uint32_t vs_blit_sh_data[MAX_SI_VS_BLIT_SGPRS]; - uint32_t cs_user_data[4]; + uint32_t cs_user_data[8]; /* Vertex buffers. */ bool vertex_buffers_dirty;