From 74658b01d2281c34988d589a2ac1861cbbba6b96 Mon Sep 17 00:00:00 2001 From: Illia Polishchuk Date: Tue, 13 Sep 2022 12:22:33 +0300 Subject: [PATCH] driconf/Intel: Add lower_depth_range_rate option workaround for Homerun Clash misrendering issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel has different Z interpolation float point rounding than other mesa gpus For example gl_Position.z = 0.0 will be interpolated to gl_FragCoord.z = 0.5 for all gpus gl_FragCoord = -0.00000001 will be interpolated to gl_FragCoord.z = 0.4999999702 for Intel and rounded to gl_FragCoord.z = 0.5 for other gpus Games with LEQUAL depth func will fail depth test on Intel and will pass it on other gpus in such case This workaround lowers translated depth range and several gl_FragCoord.z coords with extra small difference will be translated to the same UINT16\UINT24\UINT32 value of an integer depth buffer Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7199 Signed-off-by: Illia Polishchuk Reviewed-by: Tapani Pälli Part-of: --- src/gallium/drivers/crocus/crocus_screen.c | 2 ++ src/gallium/drivers/crocus/crocus_screen.h | 1 + src/gallium/drivers/crocus/crocus_state.c | 6 ++++++ src/gallium/drivers/crocus/driinfo_crocus.h | 4 ++++ src/gallium/drivers/iris/driinfo_iris.h | 4 ++++ src/gallium/drivers/iris/iris_screen.c | 2 ++ src/gallium/drivers/iris/iris_screen.h | 1 + src/gallium/drivers/iris/iris_state.c | 6 ++++++ src/intel/vulkan/anv_device.c | 6 ++++++ src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/genX_cmd_buffer.c | 5 +++++ src/intel/vulkan_hasvk/anv_device.c | 6 ++++++ src/intel/vulkan_hasvk/anv_private.h | 1 + src/intel/vulkan_hasvk/genX_cmd_buffer.c | 5 +++++ src/util/00-mesa-defaults.conf | 5 +++++ src/util/driconf.h | 4 ++++ 16 files changed, 59 insertions(+) diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c index ab804d74dbc..eb4ff319818 100644 --- a/src/gallium/drivers/crocus/crocus_screen.c +++ b/src/gallium/drivers/crocus/crocus_screen.c @@ -765,6 +765,8 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config) driQueryOptionb(config->options, "always_flush_cache"); screen->driconf.limit_trig_input_range = driQueryOptionb(config->options, "limit_trig_input_range"); + screen->driconf.lower_depth_range_rate = + driQueryOptionf(config->options, "lower_depth_range_rate"); screen->precompile = env_var_as_boolean("shader_precompile", true); diff --git a/src/gallium/drivers/crocus/crocus_screen.h b/src/gallium/drivers/crocus/crocus_screen.h index 5544134b503..f817bfa75a4 100644 --- a/src/gallium/drivers/crocus/crocus_screen.h +++ b/src/gallium/drivers/crocus/crocus_screen.h @@ -199,6 +199,7 @@ struct crocus_screen { bool disable_throttling; bool always_flush_cache; bool limit_trig_input_range; + float lower_depth_range_rate; } driconf; uint64_t aperture_bytes; diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index c884b84d466..87a70e0bebc 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -3371,9 +3371,15 @@ crocus_set_viewport_states(struct pipe_context *ctx, const struct pipe_viewport_state *states) { struct crocus_context *ice = (struct crocus_context *) ctx; + struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count); + /* Fix depth test misrenderings by lowering translated depth range */ + if (screen->driconf.lower_depth_range_rate != 1.0f) + ice->state.viewports[start_slot].translate[2] *= + screen->driconf.lower_depth_range_rate; + ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT; ice->state.dirty |= CROCUS_DIRTY_RASTER; #if GFX_VER >= 6 diff --git a/src/gallium/drivers/crocus/driinfo_crocus.h b/src/gallium/drivers/crocus/driinfo_crocus.h index 71fc5a3dc4e..c81b9a45182 100644 --- a/src/gallium/drivers/crocus/driinfo_crocus.h +++ b/src/gallium/drivers/crocus/driinfo_crocus.h @@ -10,3 +10,7 @@ DRI_CONF_SECTION_END DRI_CONF_SECTION_PERFORMANCE DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",) DRI_CONF_SECTION_END + +DRI_CONF_SECTION_QUALITY + DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE() +DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/iris/driinfo_iris.h b/src/gallium/drivers/iris/driinfo_iris.h index ff19011b954..d4d4a081b90 100644 --- a/src/gallium/drivers/iris/driinfo_iris.h +++ b/src/gallium/drivers/iris/driinfo_iris.h @@ -12,3 +12,7 @@ DRI_CONF_SECTION_PERFORMANCE DRI_CONF_ADAPTIVE_SYNC(true) DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",) DRI_CONF_SECTION_END + +DRI_CONF_SECTION_QUALITY + DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE() +DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 5bf2ec91638..c609dff1e52 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -845,6 +845,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config) driQueryOptionb(config->options, "sync_compile"); screen->driconf.limit_trig_input_range = driQueryOptionb(config->options, "limit_trig_input_range"); + screen->driconf.lower_depth_range_rate = + driQueryOptionf(config->options, "lower_depth_range_rate"); screen->precompile = env_var_as_boolean("shader_precompile", true); diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index f3ad63209bc..9cb24126203 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -182,6 +182,7 @@ struct iris_screen { bool always_flush_cache; bool sync_compile; bool limit_trig_input_range; + float lower_depth_range_rate; } driconf; /** Does the kernel support various features (KERNEL_HAS_* bitfield)? */ diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 07d3c723bc9..af1c36a7939 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3200,9 +3200,15 @@ iris_set_viewport_states(struct pipe_context *ctx, const struct pipe_viewport_state *states) { struct iris_context *ice = (struct iris_context *) ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count); + /* Fix depth test misrenderings by lowering translated depth range */ + if (screen->driconf.lower_depth_range_rate != 1.0f) + ice->state.viewports[start_slot].translate[2] *= + screen->driconf.lower_depth_range_rate; + ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; if (ice->state.cso_rast && (!ice->state.cso_rast->depth_clip_near || diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 87760eb9c52..5798d39ff2d 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -77,6 +77,10 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) DRI_CONF_SECTION_END + + DRI_CONF_SECTION_QUALITY + DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE() + DRI_CONF_SECTION_END }; /* This is probably far to big but it reflects the max size used for messages @@ -1058,6 +1062,8 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); instance->sample_mask_out_opengl_behaviour = driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); + instance->lower_depth_range_rate = + driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); } VkResult anv_CreateInstance( diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d8f140ac2ad..4f3eacc9471 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -997,6 +997,7 @@ struct anv_instance { bool assume_full_subgroups; bool limit_trig_input_range; bool sample_mask_out_opengl_behaviour; + float lower_depth_range_rate; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index cf8f0d66183..5d3f3619cf0 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3150,6 +3150,7 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) static void cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) { + struct anv_instance *instance = cmd_buffer->device->physical->instance; struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; const struct vk_dynamic_graphics_state *dyn = &cmd_buffer->vk.dynamic_graphics_state; @@ -3186,6 +3187,10 @@ cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1, }; + /* Fix depth test misrenderings by lowering translated depth range */ + if (instance->lower_depth_range_rate != 1.0f) + sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate; + const uint32_t fb_size_max = 1 << 14; uint32_t x_min = 0, x_max = fb_size_max; uint32_t y_min = 0, y_max = fb_size_max; diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index e6b5f136262..e5ce73f140e 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -77,6 +77,10 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) DRI_CONF_SECTION_END + + DRI_CONF_SECTION_QUALITY + DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE() + DRI_CONF_SECTION_END }; /* This is probably far to big but it reflects the max size used for messages @@ -1102,6 +1106,8 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); instance->sample_mask_out_opengl_behaviour = driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); + instance->lower_depth_range_rate = + driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); } VkResult anv_CreateInstance( diff --git a/src/intel/vulkan_hasvk/anv_private.h b/src/intel/vulkan_hasvk/anv_private.h index c0bd2626d10..ab5e4cc80c5 100644 --- a/src/intel/vulkan_hasvk/anv_private.h +++ b/src/intel/vulkan_hasvk/anv_private.h @@ -1085,6 +1085,7 @@ struct anv_instance { bool assume_full_subgroups; bool limit_trig_input_range; bool sample_mask_out_opengl_behaviour; + float lower_depth_range_rate; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c index e71f7075690..9cef1d68bc8 100644 --- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -3455,6 +3455,7 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) static void cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) { + struct anv_instance *instance = cmd_buffer->device->physical->instance; struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; const struct vk_dynamic_graphics_state *dyn = &cmd_buffer->vk.dynamic_graphics_state; @@ -3493,6 +3494,10 @@ cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) #endif }; + /* Fix depth test misrenderings by lowering translated depth range */ + if (instance->lower_depth_range_rate != 1.0f) + sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate; + const uint32_t fb_size_max = 1 << 14; uint32_t x_min = 0, x_max = fb_size_max; uint32_t y_min = 0, y_max = fb_size_max; diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index 293971b0ed1..c0da184bfb7 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -322,6 +322,11 @@ TODO: document the other workarounds.