diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c index 28343ebd83a..e27cae1e61b 100644 --- a/src/amd/vulkan/radv_meta_buffer.c +++ b/src/amd/vulkan/radv_meta_buffer.c @@ -16,7 +16,7 @@ build_buffer_fill_shader(struct radv_device *dev) b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -71,7 +71,7 @@ build_buffer_copy_shader(struct radv_device *dev) b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 2354ebd90f4..913d14de4a1 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -60,7 +60,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -289,7 +289,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -511,7 +511,7 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -719,7 +719,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -936,7 +936,7 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1143,7 +1143,7 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 0; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1335,7 +1335,7 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev) output_img->data.binding = 0; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 4d4dd109165..bfeda8e3fa9 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -1175,7 +1175,7 @@ build_clear_htile_mask_shader() b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index bf9ed047c35..f4596888a12 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -58,7 +58,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c index 8f229a8188b..3a109110c95 100644 --- a/src/amd/vulkan/radv_meta_fmask_expand.c +++ b/src/amd/vulkan/radv_meta_fmask_expand.c @@ -58,7 +58,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples) output_img->data.access = ACCESS_NON_READABLE; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index b57bce8ef7a..e1e30777062 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -93,7 +93,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s output_img->data.descriptor_set = 0; output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -195,7 +195,7 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, output_img->data.descriptor_set = 0; output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index feeb5d84512..66a9fe94b90 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -203,7 +203,7 @@ build_occlusion_query_shader(struct radv_device *device) { nir_builder_instr_insert(&b, &src_buf->instr); nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -395,7 +395,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { nir_builder_instr_insert(&b, &src_buf->instr); nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -635,7 +635,7 @@ build_tfb_query_shader(struct radv_device *device) /* Compute global ID. */ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -837,7 +837,7 @@ build_timestamp_query_shader(struct radv_device *device) /* Compute global ID. */ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 40c39490585..693bd332a66 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2121,6 +2121,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_local_group_size; case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: return nir_intrinsic_load_global_invocation_id; + case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID: + return nir_intrinsic_load_base_global_invocation_id; case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: return nir_intrinsic_load_global_invocation_index; case SYSTEM_VALUE_WORK_DIM: @@ -2220,6 +2222,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_LOCAL_GROUP_SIZE; case nir_intrinsic_load_global_invocation_id: return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + case nir_intrinsic_load_base_global_invocation_id: + return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID; case nir_intrinsic_load_global_invocation_index: return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX; case nir_intrinsic_load_work_dim: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index da4f95c5ed3..80c61650f78 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -586,9 +586,13 @@ system_value("tess_level_inner_default", 2) system_value("patch_vertices_in", 1) system_value("local_invocation_id", 3) system_value("local_invocation_index", 1) -system_value("work_group_id", 3) +# zero_base indicates it starts from 0 for the current dispatch +# non-zero_base indicates the base is included +system_value("work_group_id", 3, bit_sizes=[32, 64]) +system_value("work_group_id_zero_base", 3) +system_value("base_work_group_id", 3, bit_sizes=[32, 64]) system_value("user_clip_plane", 4, indices=[UCP_ID]) -system_value("num_work_groups", 3) +system_value("num_work_groups", 3, bit_sizes=[32, 64]) system_value("helper_invocation", 1, bit_sizes=[1, 32]) system_value("alpha_ref_float", 1) system_value("layer_id", 1) @@ -603,7 +607,13 @@ system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64]) system_value("num_subgroups", 1) system_value("subgroup_id", 1) system_value("local_group_size", 3) +# note: the definition of global_invocation_id_zero_base is based on +# (work_group_id * local_group_size) + local_invocation_id. +# it is *not* based on work_group_id_zero_base, meaning the work group +# base is already accounted for, and the global base is additive on top of that system_value("global_invocation_id", 3, bit_sizes=[32, 64]) +system_value("global_invocation_id_zero_base", 3, bit_sizes=[32, 64]) +system_value("base_global_invocation_id", 3, bit_sizes=[32, 64]) system_value("global_invocation_index", 1, bit_sizes=[32, 64]) system_value("work_dim", 1) system_value("line_width", 1) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 05cf5107d6e..92723a5bffb 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -44,9 +44,9 @@ static nir_ssa_def* build_global_group_size(nir_builder *b, unsigned bit_size) { nir_ssa_def *group_size = nir_load_local_group_size(b); - nir_ssa_def *num_work_groups = nir_load_num_work_groups(b); + nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size); return nir_imul(b, nir_u2u(b, group_size, bit_size), - nir_u2u(b, num_work_groups, bit_size)); + num_work_groups); } static bool @@ -189,10 +189,10 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state) case nir_intrinsic_load_global_invocation_id: { nir_ssa_def *group_size = nir_load_local_group_size(b); - nir_ssa_def *group_id = nir_load_work_group_id(b); + nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); nir_ssa_def *local_id = nir_load_local_invocation_id(b); - return nir_iadd(b, nir_imul(b, nir_u2u(b, group_id, bit_size), + return nir_iadd(b, nir_imul(b, group_id, nir_u2u(b, group_size, bit_size)), nir_u2u(b, local_id, bit_size)); } @@ -222,10 +222,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state) return NULL; } - case nir_intrinsic_load_num_work_groups: - case nir_intrinsic_load_work_group_id: - return sanitize_32bit_sysval(b, intrin); - case nir_intrinsic_load_deref: { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); if (deref->mode != nir_var_system_value) diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c index a89b5138372..a91c44369e1 100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@ -246,6 +246,7 @@ gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX), ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID), + ENUM(SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX), ENUM(SYSTEM_VALUE_WORK_GROUP_ID), ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS), diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 4ef3512849f..e2b9cd6f02e 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -615,6 +615,7 @@ typedef enum SYSTEM_VALUE_LOCAL_INVOCATION_ID, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX, SYSTEM_VALUE_GLOBAL_INVOCATION_ID, + SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID, SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX, SYSTEM_VALUE_WORK_GROUP_ID, SYSTEM_VALUE_NUM_WORK_GROUPS, diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 4d5d22f6b74..3035f5460a5 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -644,7 +644,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, break; case TGSI_SEMANTIC_BLOCK_ID: op = nir_intrinsic_load_work_group_id; - load = nir_load_work_group_id(b); + load = nir_load_work_group_id(b, 32); break; case TGSI_SEMANTIC_BLOCK_SIZE: op = nir_intrinsic_load_local_group_size;