dzn: Add a stencil blit fallback

Not all D3D hardware supports specifying the stencil ref value from the
pixel shader. For such hardware, stencil blits need to do the awful
8-pass one-bit-per-pass discard-to-not-write blit approach.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27313>
This commit is contained in:
Jesse Natalie
2024-01-26 10:56:55 -08:00
committed by Marge Bot
parent 5cdcb7134a
commit fd89ca82a6
5 changed files with 105 additions and 30 deletions

View File

@@ -2739,7 +2739,8 @@ static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
struct dzn_image *img,
VkImageAspectFlagBits aspect,
uint32_t level, uint32_t layer)
uint32_t level, uint32_t layer,
const VkOffset3D *dst_offsets)
{
bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
VkImageSubresourceRange range = {
@@ -2754,6 +2755,19 @@ dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle);
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
if (!pdev->options.PSSpecifiedStencilRefSupported) {
D3D12_RECT clear_rect = {
.left = dst_offsets[0].x,
.right = dst_offsets[1].x,
.top = dst_offsets[0].y,
.bottom = dst_offsets[1].y,
};
ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect);
}
}
} else {
D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
@@ -2767,10 +2781,12 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
const struct dzn_image *dst,
VkImageAspectFlagBits aspect,
VkFilter filter,
enum dzn_blit_resolve_mode resolve_mode)
enum dzn_blit_resolve_mode resolve_mode,
uint32_t stencil_bit)
{
struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf);
enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
VkImageUsageFlags usage =
vk_format_is_depth_or_stencil(dst->vk.format) ?
@@ -2795,6 +2811,7 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
.src_is_array = src->vk.array_layers > 1,
.resolve_mode = resolve_mode,
.linear_filter = filter == VK_FILTER_LINEAR,
.stencil_bit = stencil_bit,
.padding = 0,
};
@@ -2803,8 +2820,10 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
assert(ctx);
cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = NULL;
ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) {
cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig;
ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
}
ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
}
@@ -2954,10 +2973,13 @@ dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
const VkImageBlit2 *region = &info->pRegions[r];
bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none);
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
src, info->srcImageLayout, &region->srcSubresource,
dst, info->dstImageLayout, &region->dstSubresource,
@@ -3003,9 +3025,19 @@ dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
}
for (uint32_t slice = 0; slice < slice_count; slice++) {
dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord);
dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets);
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff);
for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, 2, (1 << stencil_bit), 0);
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
}
} else {
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
}
src_z_coord += src_slice_step;
dst_z_coord += dst_slice_step;
}
@@ -3042,9 +3074,14 @@ dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
const VkImageResolve2 *region = &info->pRegions[r];
const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode);
dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, get_blit_resolve_mode(mode));
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
src, info->srcImageLayout, &region->srcSubresource,
dst, info->dstImageLayout, &region->dstSubresource,
@@ -3085,9 +3122,20 @@ dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
dst, aspect, region->dstSubresource.mipLevel,
region->dstSubresource.baseArrayLayer + layer);
region->dstSubresource.baseArrayLayer + layer,
dst_offset);
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff);
for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, 2, (1 << stencil_bit), 0);
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
}
} else {
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
}
}
dzn_cmd_buffer_blit_issue_barriers(cmdbuf,

View File

@@ -590,17 +590,15 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *
},
.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX,
},
};
D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
.Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
.Desc_1_1 = {
.NumParameters = ARRAY_SIZE(root_params),
.pParameters = root_params,
.NumStaticSamplers = ARRAY_SIZE(samplers),
.pStaticSamplers = samplers,
.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
},
{
.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
.Constants = {
.ShaderRegister = 0,
.RegisterSpace = 0,
.Num32BitValues = 1,
},
.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
}
};
uint32_t samples = key->resolve_mode == dzn_blit_resolve_none ?
@@ -627,9 +625,25 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *
.sampler_dim = key->sampler_dim,
.src_is_array = key->src_is_array,
.resolve_mode = key->resolve_mode,
.stencil_fallback = key->loc == FRAG_RESULT_STENCIL && key->stencil_bit != 0xf,
.padding = 0,
};
D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
.Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
.Desc_1_1 = {
.NumParameters = ARRAY_SIZE(root_params),
.pParameters = root_params,
.NumStaticSamplers = ARRAY_SIZE(samplers),
.pStaticSamplers = samplers,
.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
},
};
/* Don't need fs constants unless we're doing the stencil fallback */
if (!blit_fs_info.stencil_fallback)
root_sig_desc.Desc_1_1.NumParameters--;
blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
if (!blit->root_sig) {
dzn_meta_blit_destroy(device, blit);
@@ -675,7 +689,7 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *
} else {
assert(key->loc == FRAG_RESULT_STENCIL);
desc.DepthStencilState.StencilEnable = true;
desc.DepthStencilState.StencilWriteMask = 0xff;
desc.DepthStencilState.StencilWriteMask = key->stencil_bit == 0xf ? 0xff : (1 << key->stencil_bit);
desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;

View File

@@ -650,11 +650,13 @@ dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
uint32_t out_comps =
(info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
nir_variable *out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_vector_type(info->out_type, out_comps),
"out");
out->data.location = info->loc;
nir_variable *out = NULL;
if (!info->stencil_fallback) {
out = nir_variable_create(b.shader, nir_var_shader_out,
glsl_vector_type(info->out_type, out_comps),
"out");
out->data.location = info->loc;
}
nir_def *res = NULL;
@@ -771,7 +773,16 @@ dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
res = &tex->def;
}
nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
if (info->stencil_fallback) {
nir_def *mask_desc =
dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "mask", 0);
nir_def *mask = nir_load_ubo(&b, 1, 32, mask_desc, nir_imm_int(&b, 0),
.align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
nir_def *fail = nir_ieq_imm(&b, nir_iand(&b, nir_channel(&b, res, 0), mask), 0);
nir_discard_if(&b, fail);
} else {
nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
}
return b.shader;
}

View File

@@ -153,7 +153,8 @@ struct dzn_nir_blit_info {
uint32_t sampler_dim : 4;
uint32_t src_is_array : 1;
uint32_t resolve_mode : 3;
uint32_t padding : 10;
uint32_t stencil_fallback : 1;
uint32_t padding : 9;
};
const uint32_t hash_key;
};

View File

@@ -159,7 +159,8 @@ struct dzn_meta_blit_key {
uint32_t src_is_array : 1;
uint32_t resolve_mode : 3;
uint32_t linear_filter : 1;
uint32_t padding : 9;
uint32_t stencil_bit : 4;
uint32_t padding : 5;
};
const uint64_t u64;
};