radv/gfx10: fix required ballot size with VK_EXT_subgroup_size_control
If compute shaders require a specific subgroup size (ie. Wave32),
we have to use the correct ballot size.
Fixes dEQP-VK.subgroups.ballot_other.compute.*_requiredsubgroupSize.
Fixes: fb07fd4e6c
("radv: implement VK_EXT_subgroup_size_control")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4215>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4215>
This commit is contained in:

committed by
Marge Bot

parent
672d106199
commit
c923de68dd
@@ -3925,7 +3925,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||||||
|
|
||||||
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
|
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
|
||||||
args->options->family, float_mode,
|
args->options->family, float_mode,
|
||||||
args->shader_info->wave_size, 64);
|
args->shader_info->wave_size,
|
||||||
|
args->shader_info->ballot_bit_size);
|
||||||
ctx.context = ctx.ac.context;
|
ctx.context = ctx.ac.context;
|
||||||
|
|
||||||
ctx.max_workgroup_size = 0;
|
ctx.max_workgroup_size = 0;
|
||||||
|
@@ -2530,6 +2530,17 @@ radv_get_wave_size(struct radv_device *device,
|
|||||||
return device->physical_device->ge_wave_size;
|
return device->physical_device->ge_wave_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint8_t
|
||||||
|
radv_get_ballot_bit_size(struct radv_device *device,
|
||||||
|
const VkPipelineShaderStageCreateInfo *pStage,
|
||||||
|
gl_shader_stage stage,
|
||||||
|
const struct radv_shader_variant_key *key)
|
||||||
|
{
|
||||||
|
if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
|
||||||
|
return key->cs.subgroup_size;
|
||||||
|
return 64;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
radv_fill_shader_info(struct radv_pipeline *pipeline,
|
radv_fill_shader_info(struct radv_pipeline *pipeline,
|
||||||
const VkPipelineShaderStageCreateInfo **pStages,
|
const VkPipelineShaderStageCreateInfo **pStages,
|
||||||
@@ -2642,10 +2653,15 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
|
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||||
if (nir[i])
|
if (nir[i]) {
|
||||||
infos[i].wave_size =
|
infos[i].wave_size =
|
||||||
radv_get_wave_size(pipeline->device, pStages[i],
|
radv_get_wave_size(pipeline->device, pStages[i],
|
||||||
i, &keys[i]);
|
i, &keys[i]);
|
||||||
|
infos[i].ballot_bit_size =
|
||||||
|
radv_get_ballot_bit_size(pipeline->device,
|
||||||
|
pStages[i], i,
|
||||||
|
&keys[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2788,7 +2804,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
|||||||
|
|
||||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
|
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||||
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
|
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
|
||||||
unsigned subgroup_size = 64;
|
unsigned subgroup_size = 64, ballot_bit_size = 64;
|
||||||
|
|
||||||
if (!modules[i])
|
if (!modules[i])
|
||||||
continue;
|
continue;
|
||||||
@@ -2802,13 +2818,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
|||||||
assert(device->physical_device->rad_info.chip_class >= GFX10 &&
|
assert(device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||||
i == MESA_SHADER_COMPUTE);
|
i == MESA_SHADER_COMPUTE);
|
||||||
subgroup_size = key->compute_subgroup_size;
|
subgroup_size = key->compute_subgroup_size;
|
||||||
|
ballot_bit_size = key->compute_subgroup_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
nir[i] = radv_shader_compile_to_nir(device, modules[i],
|
nir[i] = radv_shader_compile_to_nir(device, modules[i],
|
||||||
stage ? stage->pName : "main", i,
|
stage ? stage->pName : "main", i,
|
||||||
stage ? stage->pSpecializationInfo : NULL,
|
stage ? stage->pSpecializationInfo : NULL,
|
||||||
flags, pipeline->layout,
|
flags, pipeline->layout,
|
||||||
subgroup_size);
|
subgroup_size, ballot_bit_size);
|
||||||
|
|
||||||
/* We don't want to alter meta shaders IR directly so clone it
|
/* We don't want to alter meta shaders IR directly so clone it
|
||||||
* first.
|
* first.
|
||||||
@@ -2888,6 +2905,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
|||||||
pipeline->layout, &key,
|
pipeline->layout, &key,
|
||||||
&info);
|
&info);
|
||||||
info.wave_size = 64; /* Wave32 not supported. */
|
info.wave_size = 64; /* Wave32 not supported. */
|
||||||
|
info.ballot_bit_size = 64;
|
||||||
|
|
||||||
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
|
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
|
||||||
device, nir[MESA_SHADER_GEOMETRY], &info,
|
device, nir[MESA_SHADER_GEOMETRY], &info,
|
||||||
|
@@ -293,7 +293,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
const VkSpecializationInfo *spec_info,
|
const VkSpecializationInfo *spec_info,
|
||||||
const VkPipelineCreateFlags flags,
|
const VkPipelineCreateFlags flags,
|
||||||
const struct radv_pipeline_layout *layout,
|
const struct radv_pipeline_layout *layout,
|
||||||
unsigned subgroup_size)
|
unsigned subgroup_size, unsigned ballot_bit_size)
|
||||||
{
|
{
|
||||||
nir_shader *nir;
|
nir_shader *nir;
|
||||||
const nir_shader_compiler_options *nir_options =
|
const nir_shader_compiler_options *nir_options =
|
||||||
@@ -483,7 +483,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
|
bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
|
||||||
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
|
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
|
||||||
.subgroup_size = subgroup_size,
|
.subgroup_size = subgroup_size,
|
||||||
.ballot_bit_size = 64,
|
.ballot_bit_size = ballot_bit_size,
|
||||||
.lower_to_scalar = 1,
|
.lower_to_scalar = 1,
|
||||||
.lower_subgroup_masks = 1,
|
.lower_subgroup_masks = 1,
|
||||||
.lower_shuffle = 1,
|
.lower_shuffle = 1,
|
||||||
|
@@ -236,6 +236,7 @@ struct radv_shader_info {
|
|||||||
bool uses_invocation_id;
|
bool uses_invocation_id;
|
||||||
bool uses_prim_id;
|
bool uses_prim_id;
|
||||||
uint8_t wave_size;
|
uint8_t wave_size;
|
||||||
|
uint8_t ballot_bit_size;
|
||||||
struct radv_userdata_locations user_sgprs_locs;
|
struct radv_userdata_locations user_sgprs_locs;
|
||||||
unsigned num_user_sgprs;
|
unsigned num_user_sgprs;
|
||||||
unsigned num_input_sgprs;
|
unsigned num_input_sgprs;
|
||||||
@@ -404,7 +405,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
const VkSpecializationInfo *spec_info,
|
const VkSpecializationInfo *spec_info,
|
||||||
const VkPipelineCreateFlags flags,
|
const VkPipelineCreateFlags flags,
|
||||||
const struct radv_pipeline_layout *layout,
|
const struct radv_pipeline_layout *layout,
|
||||||
unsigned subgroup_size);
|
unsigned subgroup_size, unsigned ballot_bit_size);
|
||||||
|
|
||||||
void *
|
void *
|
||||||
radv_alloc_shader_memory(struct radv_device *device,
|
radv_alloc_shader_memory(struct radv_device *device,
|
||||||
|
Reference in New Issue
Block a user