anv: workaround apps that assume full subgroups without specifying it
Without this we might choose 8 or 16 width, while the app assumes 32. With subgroup operations it may cause wrong calculations and thus bugs. Examples of such games are Aperture Desk Job and DOOM Eternal. v2: Make it a driconf option instead of applying unconditionally, move from brw_required_dispatch_width to brw_compile_cs v3: Rename allow_assuming_full_subgroups -> assume_full_subgroups. Include assume_full_subgroups value in anv_pipeline_hash_compute(). v4: Move actual workaround code from brw_fs.c -> anv_pipeline.c. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6171 Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15708>
This commit is contained in:

committed by
Marge Bot

parent
f97d82c52b
commit
28ca5636f6
@@ -68,6 +68,7 @@ static const driOptionDescription anv_dri_options[] = {
|
|||||||
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
|
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
|
||||||
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
|
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
|
||||||
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
|
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
|
||||||
|
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
|
||||||
DRI_CONF_SECTION_END
|
DRI_CONF_SECTION_END
|
||||||
|
|
||||||
DRI_CONF_SECTION_DEBUG
|
DRI_CONF_SECTION_DEBUG
|
||||||
@@ -1100,6 +1101,9 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||||||
instance->vk.app_info.app_version,
|
instance->vk.app_info.app_version,
|
||||||
instance->vk.app_info.engine_name,
|
instance->vk.app_info.engine_name,
|
||||||
instance->vk.app_info.engine_version);
|
instance->vk.app_info.engine_version);
|
||||||
|
|
||||||
|
instance->assume_full_subgroups =
|
||||||
|
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult anv_CreateInstance(
|
VkResult anv_CreateInstance(
|
||||||
|
@@ -640,9 +640,14 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
|
|||||||
if (layout)
|
if (layout)
|
||||||
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
||||||
|
|
||||||
const bool rba = pipeline->base.device->robust_buffer_access;
|
const struct anv_device *device = pipeline->base.device;
|
||||||
|
|
||||||
|
const bool rba = device->robust_buffer_access;
|
||||||
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
|
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
|
||||||
|
|
||||||
|
const bool afs = device->physical->instance->assume_full_subgroups;
|
||||||
|
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
|
||||||
|
|
||||||
_mesa_sha1_update(&ctx, stage->shader_sha1,
|
_mesa_sha1_update(&ctx, stage->shader_sha1,
|
||||||
sizeof(stage->shader_sha1));
|
sizeof(stage->shader_sha1));
|
||||||
_mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
|
_mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
|
||||||
@@ -1915,7 +1920,8 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
};
|
};
|
||||||
int64_t pipeline_start = os_time_get_nano();
|
int64_t pipeline_start = os_time_get_nano();
|
||||||
|
|
||||||
const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
|
struct anv_device *device = pipeline->base.device;
|
||||||
|
const struct brw_compiler *compiler = device->physical->compiler;
|
||||||
|
|
||||||
struct anv_pipeline_stage stage = {
|
struct anv_pipeline_stage stage = {
|
||||||
.stage = MESA_SHADER_COMPUTE,
|
.stage = MESA_SHADER_COMPUTE,
|
||||||
@@ -1944,8 +1950,8 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
const enum brw_subgroup_size_type subgroup_size_type =
|
const enum brw_subgroup_size_type subgroup_size_type =
|
||||||
anv_subgroup_size_type(MESA_SHADER_COMPUTE, stage.module, info->stage.flags, rss_info);
|
anv_subgroup_size_type(MESA_SHADER_COMPUTE, stage.module, info->stage.flags, rss_info);
|
||||||
|
|
||||||
populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
|
populate_cs_prog_key(&device->info, subgroup_size_type,
|
||||||
pipeline->base.device->robust_buffer_access,
|
device->robust_buffer_access,
|
||||||
&stage.key.cs);
|
&stage.key.cs);
|
||||||
|
|
||||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||||
@@ -1957,7 +1963,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
|
|
||||||
bool cache_hit = false;
|
bool cache_hit = false;
|
||||||
if (!skip_cache_lookup) {
|
if (!skip_cache_lookup) {
|
||||||
bin = anv_device_search_for_kernel(pipeline->base.device, cache,
|
bin = anv_device_search_for_kernel(device, cache,
|
||||||
&stage.cache_key,
|
&stage.cache_key,
|
||||||
sizeof(stage.cache_key),
|
sizeof(stage.cache_key),
|
||||||
&cache_hit);
|
&cache_hit);
|
||||||
@@ -1992,6 +1998,21 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
|
|
||||||
anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
|
anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
|
||||||
|
|
||||||
|
unsigned local_size = stage.nir->info.workgroup_size[0] *
|
||||||
|
stage.nir->info.workgroup_size[1] *
|
||||||
|
stage.nir->info.workgroup_size[2];
|
||||||
|
|
||||||
|
/* Games don't always request full subgroups when they should,
|
||||||
|
* which can cause bugs, as they may expect bigger size of the
|
||||||
|
* subgroup than we choose for the execution.
|
||||||
|
*/
|
||||||
|
if (device->physical->instance->assume_full_subgroups &&
|
||||||
|
stage.nir->info.cs.uses_wide_subgroup_intrinsics &&
|
||||||
|
subgroup_size_type == BRW_SUBGROUP_SIZE_API_CONSTANT &&
|
||||||
|
local_size &&
|
||||||
|
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||||
|
stage.key.base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
|
||||||
|
|
||||||
stage.num_stats = 1;
|
stage.num_stats = 1;
|
||||||
|
|
||||||
struct brw_compile_cs_params params = {
|
struct brw_compile_cs_params params = {
|
||||||
@@ -1999,7 +2020,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
.key = &stage.key.cs,
|
.key = &stage.key.cs,
|
||||||
.prog_data = &stage.prog_data.cs,
|
.prog_data = &stage.prog_data.cs,
|
||||||
.stats = stage.stats,
|
.stats = stage.stats,
|
||||||
.log_data = pipeline->base.device,
|
.log_data = device,
|
||||||
};
|
};
|
||||||
|
|
||||||
stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms);
|
stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms);
|
||||||
@@ -2017,7 +2038,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const unsigned code_size = stage.prog_data.base.program_size;
|
const unsigned code_size = stage.prog_data.base.program_size;
|
||||||
bin = anv_device_upload_kernel(pipeline->base.device, cache,
|
bin = anv_device_upload_kernel(device, cache,
|
||||||
MESA_SHADER_COMPUTE,
|
MESA_SHADER_COMPUTE,
|
||||||
&stage.cache_key, sizeof(stage.cache_key),
|
&stage.cache_key, sizeof(stage.cache_key),
|
||||||
stage.code, code_size,
|
stage.code, code_size,
|
||||||
|
@@ -1079,6 +1079,11 @@ struct anv_instance {
|
|||||||
|
|
||||||
struct driOptionCache dri_options;
|
struct driOptionCache dri_options;
|
||||||
struct driOptionCache available_dri_options;
|
struct driOptionCache available_dri_options;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Workarounds for game bugs.
|
||||||
|
*/
|
||||||
|
bool assume_full_subgroups;
|
||||||
};
|
};
|
||||||
|
|
||||||
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
||||||
|
@@ -914,6 +914,14 @@ TODO: document the other workarounds.
|
|||||||
<option name="vs_position_always_invariant" value="true" />
|
<option name="vs_position_always_invariant" value="true" />
|
||||||
</application>
|
</application>
|
||||||
</device>
|
</device>
|
||||||
|
<device driver="anv">
|
||||||
|
<application name="Aperture Desk Job" executable="deskjob">
|
||||||
|
<option name="anv_assume_full_subgroups" value="true" />
|
||||||
|
</application>
|
||||||
|
<application name="DOOMEternal" executable="DOOMEternalx64vk.exe">
|
||||||
|
<option name="anv_assume_full_subgroups" value="true" />
|
||||||
|
</application>
|
||||||
|
</device>
|
||||||
|
|
||||||
<device driver="virtio_gpu">
|
<device driver="virtio_gpu">
|
||||||
<application name="Counter-Strike Global Offensive" executable="csgo_linux64">
|
<application name="Counter-Strike Global Offensive" executable="csgo_linux64">
|
||||||
|
@@ -576,4 +576,12 @@
|
|||||||
DRI_CONF_OPT_B(radv_disable_aniso_single_level, def, \
|
DRI_CONF_OPT_B(radv_disable_aniso_single_level, def, \
|
||||||
"Disable anisotropic filtering for single level images")
|
"Disable anisotropic filtering for single level images")
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief ANV specific configuration options
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(def) \
|
||||||
|
DRI_CONF_OPT_B(anv_assume_full_subgroups, def, \
|
||||||
|
"Allow assuming full subgroups requirement even when it's not specified explicitly")
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user