intel/fs: fix subgroup invocation read bounds checking

nir->info.subgroup_size can be set to an enum :
  SUBGROUP_SIZE_VARYING = 0
  SUBGROUP_SIZE_UNIFORM = 1
  SUBGROUP_SIZE_API_CONSTANT = 2
  SUBGROUP_SIZE_FULL_SUBGROUPS = 3

So compute the API subgroup size value and compare it to the dispatch
size to determine whether we need some bound checking.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 9ac192d79d ("intel/fs: bound subgroup invocation read to dispatch size")
Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21856>
This commit is contained in:
Lionel Landwerlin
2023-03-10 22:57:36 +02:00
committed by Marge Bot
parent f6a36190a1
commit 56474fae93
5 changed files with 26 additions and 1 deletions

View File

@@ -535,6 +535,9 @@ public:
const unsigned dispatch_width; /**< 8, 16 or 32 */ const unsigned dispatch_width; /**< 8, 16 or 32 */
unsigned max_dispatch_width; unsigned max_dispatch_width;
/* The API selected subgroup size */
unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
struct shader_stats shader_stats; struct shader_stats shader_stats;
brw::fs_builder bld; brw::fs_builder bld;

View File

@@ -5326,7 +5326,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* FS), bound the invocation to the dispatch size. * FS), bound the invocation to the dispatch size.
*/ */
fs_reg bound_invocation; fs_reg bound_invocation;
if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) { if (api_subgroup_size == 0 ||
bld.dispatch_width() < api_subgroup_size) {
bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD); bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1)); bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1));
} else { } else {

View File

@@ -29,6 +29,7 @@
*/ */
#include "brw_eu.h" #include "brw_eu.h"
#include "brw_fs.h" #include "brw_fs.h"
#include "brw_nir.h"
#include "compiler/glsl_types.h" #include "compiler/glsl_types.h"
using namespace brw; using namespace brw;
@@ -1362,9 +1363,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
performance_analysis(this), performance_analysis(this),
needs_register_pressure(needs_register_pressure), needs_register_pressure(needs_register_pressure),
dispatch_width(dispatch_width), dispatch_width(dispatch_width),
api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)),
bld(fs_builder(this, dispatch_width).at_end()) bld(fs_builder(this, dispatch_width).at_end())
{ {
init(); init();
assert(api_subgroup_size == 0 ||
api_subgroup_size == 8 ||
api_subgroup_size == 16 ||
api_subgroup_size == 32);
} }
fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
@@ -1382,9 +1388,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
performance_analysis(this), performance_analysis(this),
needs_register_pressure(needs_register_pressure), needs_register_pressure(needs_register_pressure),
dispatch_width(8), dispatch_width(8),
api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)),
bld(fs_builder(this, dispatch_width).at_end()) bld(fs_builder(this, dispatch_width).at_end())
{ {
init(); init();
assert(api_subgroup_size == 0 ||
api_subgroup_size == 8 ||
api_subgroup_size == 16 ||
api_subgroup_size == 32);
} }
void void

View File

@@ -1689,6 +1689,13 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
unreachable("Invalid subgroup size type"); unreachable("Invalid subgroup size type");
} }
unsigned
brw_nir_api_subgroup_size(const nir_shader *nir,
unsigned hw_subgroup_size)
{
return get_subgroup_size(&nir->info, hw_subgroup_size);
}
void void
brw_nir_apply_key(nir_shader *nir, brw_nir_apply_key(nir_shader *nir,
const struct brw_compiler *compiler, const struct brw_compiler *compiler,

View File

@@ -160,6 +160,9 @@ void brw_nir_apply_key(nir_shader *nir,
unsigned max_subgroup_size, unsigned max_subgroup_size,
bool is_scalar); bool is_scalar);
unsigned brw_nir_api_subgroup_size(const nir_shader *nir,
unsigned hw_subgroup_size);
enum brw_conditional_mod brw_cmod_for_nir_comparison(nir_op op); enum brw_conditional_mod brw_cmod_for_nir_comparison(nir_op op);
enum lsc_opcode lsc_aop_for_nir_intrinsic(const nir_intrinsic_instr *atomic); enum lsc_opcode lsc_aop_for_nir_intrinsic(const nir_intrinsic_instr *atomic);
enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info *devinfo, enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info *devinfo,