From fd89ca82a6394e6b3541f368a5abf53cddeb3d32 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Fri, 26 Jan 2024 10:56:55 -0800 Subject: [PATCH] dzn: Add a stencil blit fallback Not all D3D hardware supports specifying the stencil ref value from the pixel shader. For such hardware, stencil blits need to do the awful 8-pass one-bit-per-pass discard-to-not-write blit approach. Part-of: --- src/microsoft/vulkan/dzn_cmd_buffer.c | 68 +++++++++++++++++++++++---- src/microsoft/vulkan/dzn_meta.c | 38 ++++++++++----- src/microsoft/vulkan/dzn_nir.c | 23 ++++++--- src/microsoft/vulkan/dzn_nir.h | 3 +- src/microsoft/vulkan/dzn_private.h | 3 +- 5 files changed, 105 insertions(+), 30 deletions(-) diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.c b/src/microsoft/vulkan/dzn_cmd_buffer.c index ea624201e1a..15f0a12b27b 100644 --- a/src/microsoft/vulkan/dzn_cmd_buffer.c +++ b/src/microsoft/vulkan/dzn_cmd_buffer.c @@ -2739,7 +2739,8 @@ static void dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf, struct dzn_image *img, VkImageAspectFlagBits aspect, - uint32_t level, uint32_t layer) + uint32_t level, uint32_t layer, + const VkOffset3D *dst_offsets) { bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); VkImageSubresourceRange range = { @@ -2754,6 +2755,19 @@ dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf, D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0); D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc); ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle); + + if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); + if (!pdev->options.PSSpecifiedStencilRefSupported) { + D3D12_RECT clear_rect = { + .left = dst_offsets[0].x, + .right = dst_offsets[1].x, + .top = dst_offsets[0].y, + .bottom = dst_offsets[1].y, + }; + ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect); + } + } } else { D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0); D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc); @@ -2767,10 +2781,12 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, const struct dzn_image *dst, VkImageAspectFlagBits aspect, VkFilter filter, - enum dzn_blit_resolve_mode resolve_mode) + enum dzn_blit_resolve_mode resolve_mode, + uint32_t stencil_bit) { struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); + assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf); enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format); VkImageUsageFlags usage = vk_format_is_depth_or_stencil(dst->vk.format) ? @@ -2795,6 +2811,7 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, .src_is_array = src->vk.array_layers > 1, .resolve_mode = resolve_mode, .linear_filter = filter == VK_FILTER_LINEAR, + .stencil_bit = stencil_bit, .padding = 0, }; @@ -2803,8 +2820,10 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, assert(ctx); cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = NULL; - ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig); + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig; + ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig); + } ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state); } @@ -2954,10 +2973,13 @@ dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf, const VkImageBlit2 *region = &info->pRegions[r]; bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D; bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D; + const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); + bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported; + uint32_t stencil_bit = support_stencil_blit ? 0xf : 0; dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON; - dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none); + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit); dzn_cmd_buffer_blit_issue_barriers(cmdbuf, src, info->srcImageLayout, ®ion->srcSubresource, dst, info->dstImageLayout, ®ion->dstSubresource, @@ -3003,9 +3025,19 @@ dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf, } for (uint32_t slice = 0; slice < slice_count; slice++) { - dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord); + dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets); ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); - ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) { + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; + ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff); + for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) { + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit); + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, 2, (1 << stencil_bit), 0); + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + } + } else { + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + } src_z_coord += src_slice_step; dst_z_coord += dst_slice_step; } @@ -3042,9 +3074,14 @@ dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf, const VkImageResolve2 *region = &info->pRegions[r]; + const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); + bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported; + uint32_t stencil_bit = support_stencil_blit ? 0xf : 0; + enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode); + dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON; - dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, get_blit_resolve_mode(mode)); + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit); dzn_cmd_buffer_blit_issue_barriers(cmdbuf, src, info->srcImageLayout, ®ion->srcSubresource, dst, info->dstImageLayout, ®ion->dstSubresource, @@ -3085,9 +3122,20 @@ dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf, dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, region->dstSubresource.mipLevel, - region->dstSubresource.baseArrayLayer + layer); + region->dstSubresource.baseArrayLayer + layer, + dst_offset); ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); - ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) { + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; + ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff); + for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) { + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit); + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, 2, (1 << stencil_bit), 0); + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + } + } else { + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + } } dzn_cmd_buffer_blit_issue_barriers(cmdbuf, diff --git a/src/microsoft/vulkan/dzn_meta.c b/src/microsoft/vulkan/dzn_meta.c index 0114e545182..9d1b4e65d1e 100644 --- a/src/microsoft/vulkan/dzn_meta.c +++ b/src/microsoft/vulkan/dzn_meta.c @@ -590,17 +590,15 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key * }, .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX, }, - }; - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { - .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, - .Desc_1_1 = { - .NumParameters = ARRAY_SIZE(root_params), - .pParameters = root_params, - .NumStaticSamplers = ARRAY_SIZE(samplers), - .pStaticSamplers = samplers, - .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, - }, + { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = 1, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, + } }; uint32_t samples = key->resolve_mode == dzn_blit_resolve_none ? @@ -627,9 +625,25 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key * .sampler_dim = key->sampler_dim, .src_is_array = key->src_is_array, .resolve_mode = key->resolve_mode, + .stencil_fallback = key->loc == FRAG_RESULT_STENCIL && key->stencil_bit != 0xf, .padding = 0, }; + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = ARRAY_SIZE(root_params), + .pParameters = root_params, + .NumStaticSamplers = ARRAY_SIZE(samplers), + .pStaticSamplers = samplers, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + /* Don't need fs constants unless we're doing the stencil fallback */ + if (!blit_fs_info.stencil_fallback) + root_sig_desc.Desc_1_1.NumParameters--; + blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); if (!blit->root_sig) { dzn_meta_blit_destroy(device, blit); @@ -675,7 +689,7 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key * } else { assert(key->loc == FRAG_RESULT_STENCIL); desc.DepthStencilState.StencilEnable = true; - desc.DepthStencilState.StencilWriteMask = 0xff; + desc.DepthStencilState.StencilWriteMask = key->stencil_bit == 0xf ? 0xff : (1 << key->stencil_bit); desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE; desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE; desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; diff --git a/src/microsoft/vulkan/dzn_nir.c b/src/microsoft/vulkan/dzn_nir.c index 667ec31fc7f..ba03f2bb7e8 100644 --- a/src/microsoft/vulkan/dzn_nir.c +++ b/src/microsoft/vulkan/dzn_nir.c @@ -650,11 +650,13 @@ dzn_nir_blit_fs(const struct dzn_nir_blit_info *info) uint32_t out_comps = (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4; - nir_variable *out = - nir_variable_create(b.shader, nir_var_shader_out, - glsl_vector_type(info->out_type, out_comps), - "out"); - out->data.location = info->loc; + nir_variable *out = NULL; + if (!info->stencil_fallback) { + out = nir_variable_create(b.shader, nir_var_shader_out, + glsl_vector_type(info->out_type, out_comps), + "out"); + out->data.location = info->loc; + } nir_def *res = NULL; @@ -771,7 +773,16 @@ dzn_nir_blit_fs(const struct dzn_nir_blit_info *info) res = &tex->def; } - nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf); + if (info->stencil_fallback) { + nir_def *mask_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "mask", 0); + nir_def *mask = nir_load_ubo(&b, 1, 32, mask_desc, nir_imm_int(&b, 0), + .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0); + nir_def *fail = nir_ieq_imm(&b, nir_iand(&b, nir_channel(&b, res, 0), mask), 0); + nir_discard_if(&b, fail); + } else { + nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf); + } return b.shader; } diff --git a/src/microsoft/vulkan/dzn_nir.h b/src/microsoft/vulkan/dzn_nir.h index c236e0fd0fd..0fb33a175f8 100644 --- a/src/microsoft/vulkan/dzn_nir.h +++ b/src/microsoft/vulkan/dzn_nir.h @@ -153,7 +153,8 @@ struct dzn_nir_blit_info { uint32_t sampler_dim : 4; uint32_t src_is_array : 1; uint32_t resolve_mode : 3; - uint32_t padding : 10; + uint32_t stencil_fallback : 1; + uint32_t padding : 9; }; const uint32_t hash_key; }; diff --git a/src/microsoft/vulkan/dzn_private.h b/src/microsoft/vulkan/dzn_private.h index 8742dbd55f3..47d5a351943 100644 --- a/src/microsoft/vulkan/dzn_private.h +++ b/src/microsoft/vulkan/dzn_private.h @@ -159,7 +159,8 @@ struct dzn_meta_blit_key { uint32_t src_is_array : 1; uint32_t resolve_mode : 3; uint32_t linear_filter : 1; - uint32_t padding : 9; + uint32_t stencil_bit : 4; + uint32_t padding : 5; }; const uint64_t u64; };