dzn: Add a stencil blit fallback
Not all D3D hardware supports specifying the stencil ref value from the pixel shader. For such hardware, stencil blits need to do the awful 8-pass one-bit-per-pass discard-to-not-write blit approach. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27313>
This commit is contained in:
@@ -2739,7 +2739,8 @@ static void
|
||||
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
|
||||
struct dzn_image *img,
|
||||
VkImageAspectFlagBits aspect,
|
||||
uint32_t level, uint32_t layer)
|
||||
uint32_t level, uint32_t layer,
|
||||
const VkOffset3D *dst_offsets)
|
||||
{
|
||||
bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
VkImageSubresourceRange range = {
|
||||
@@ -2754,6 +2755,19 @@ dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
|
||||
D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
|
||||
ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle);
|
||||
|
||||
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
|
||||
if (!pdev->options.PSSpecifiedStencilRefSupported) {
|
||||
D3D12_RECT clear_rect = {
|
||||
.left = dst_offsets[0].x,
|
||||
.right = dst_offsets[1].x,
|
||||
.top = dst_offsets[0].y,
|
||||
.bottom = dst_offsets[1].y,
|
||||
};
|
||||
ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
|
||||
@@ -2767,10 +2781,12 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
|
||||
const struct dzn_image *dst,
|
||||
VkImageAspectFlagBits aspect,
|
||||
VkFilter filter,
|
||||
enum dzn_blit_resolve_mode resolve_mode)
|
||||
enum dzn_blit_resolve_mode resolve_mode,
|
||||
uint32_t stencil_bit)
|
||||
{
|
||||
struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
|
||||
struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
|
||||
assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf);
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
|
||||
VkImageUsageFlags usage =
|
||||
vk_format_is_depth_or_stencil(dst->vk.format) ?
|
||||
@@ -2795,6 +2811,7 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
|
||||
.src_is_array = src->vk.array_layers > 1,
|
||||
.resolve_mode = resolve_mode,
|
||||
.linear_filter = filter == VK_FILTER_LINEAR,
|
||||
.stencil_bit = stencil_bit,
|
||||
.padding = 0,
|
||||
};
|
||||
|
||||
@@ -2803,8 +2820,10 @@ dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
|
||||
assert(ctx);
|
||||
|
||||
cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
|
||||
cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = NULL;
|
||||
ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
|
||||
if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) {
|
||||
cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig;
|
||||
ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
|
||||
}
|
||||
ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
|
||||
}
|
||||
|
||||
@@ -2954,10 +2973,13 @@ dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
|
||||
const VkImageBlit2 *region = &info->pRegions[r];
|
||||
bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
|
||||
bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
|
||||
const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
|
||||
bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
|
||||
uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
|
||||
|
||||
dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
|
||||
D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
|
||||
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none);
|
||||
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
|
||||
dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
|
||||
src, info->srcImageLayout, ®ion->srcSubresource,
|
||||
dst, info->dstImageLayout, ®ion->dstSubresource,
|
||||
@@ -3003,9 +3025,19 @@ dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
|
||||
}
|
||||
|
||||
for (uint32_t slice = 0; slice < slice_count; slice++) {
|
||||
dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord);
|
||||
dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets);
|
||||
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
|
||||
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
|
||||
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
|
||||
cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
|
||||
ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff);
|
||||
for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
|
||||
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
|
||||
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, 2, (1 << stencil_bit), 0);
|
||||
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
|
||||
}
|
||||
} else {
|
||||
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
|
||||
}
|
||||
src_z_coord += src_slice_step;
|
||||
dst_z_coord += dst_slice_step;
|
||||
}
|
||||
@@ -3042,9 +3074,14 @@ dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
|
||||
|
||||
const VkImageResolve2 *region = &info->pRegions[r];
|
||||
|
||||
const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
|
||||
bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
|
||||
uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
|
||||
enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode);
|
||||
|
||||
dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
|
||||
D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
|
||||
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, get_blit_resolve_mode(mode));
|
||||
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
|
||||
dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
|
||||
src, info->srcImageLayout, ®ion->srcSubresource,
|
||||
dst, info->dstImageLayout, ®ion->dstSubresource,
|
||||
@@ -3085,9 +3122,20 @@ dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
|
||||
|
||||
dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
|
||||
dst, aspect, region->dstSubresource.mipLevel,
|
||||
region->dstSubresource.baseArrayLayer + layer);
|
||||
region->dstSubresource.baseArrayLayer + layer,
|
||||
dst_offset);
|
||||
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
|
||||
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
|
||||
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
|
||||
cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
|
||||
ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff);
|
||||
for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
|
||||
dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
|
||||
ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, 2, (1 << stencil_bit), 0);
|
||||
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
|
||||
}
|
||||
} else {
|
||||
ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
|
||||
|
@@ -590,17 +590,15 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *
|
||||
},
|
||||
.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX,
|
||||
},
|
||||
};
|
||||
|
||||
D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
|
||||
.Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
|
||||
.Desc_1_1 = {
|
||||
.NumParameters = ARRAY_SIZE(root_params),
|
||||
.pParameters = root_params,
|
||||
.NumStaticSamplers = ARRAY_SIZE(samplers),
|
||||
.pStaticSamplers = samplers,
|
||||
.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
|
||||
},
|
||||
{
|
||||
.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
|
||||
.Constants = {
|
||||
.ShaderRegister = 0,
|
||||
.RegisterSpace = 0,
|
||||
.Num32BitValues = 1,
|
||||
},
|
||||
.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
|
||||
}
|
||||
};
|
||||
|
||||
uint32_t samples = key->resolve_mode == dzn_blit_resolve_none ?
|
||||
@@ -627,9 +625,25 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *
|
||||
.sampler_dim = key->sampler_dim,
|
||||
.src_is_array = key->src_is_array,
|
||||
.resolve_mode = key->resolve_mode,
|
||||
.stencil_fallback = key->loc == FRAG_RESULT_STENCIL && key->stencil_bit != 0xf,
|
||||
.padding = 0,
|
||||
};
|
||||
|
||||
D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
|
||||
.Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
|
||||
.Desc_1_1 = {
|
||||
.NumParameters = ARRAY_SIZE(root_params),
|
||||
.pParameters = root_params,
|
||||
.NumStaticSamplers = ARRAY_SIZE(samplers),
|
||||
.pStaticSamplers = samplers,
|
||||
.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
|
||||
},
|
||||
};
|
||||
|
||||
/* Don't need fs constants unless we're doing the stencil fallback */
|
||||
if (!blit_fs_info.stencil_fallback)
|
||||
root_sig_desc.Desc_1_1.NumParameters--;
|
||||
|
||||
blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
|
||||
if (!blit->root_sig) {
|
||||
dzn_meta_blit_destroy(device, blit);
|
||||
@@ -675,7 +689,7 @@ dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *
|
||||
} else {
|
||||
assert(key->loc == FRAG_RESULT_STENCIL);
|
||||
desc.DepthStencilState.StencilEnable = true;
|
||||
desc.DepthStencilState.StencilWriteMask = 0xff;
|
||||
desc.DepthStencilState.StencilWriteMask = key->stencil_bit == 0xf ? 0xff : (1 << key->stencil_bit);
|
||||
desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
|
||||
desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
|
||||
desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
|
||||
|
@@ -650,11 +650,13 @@ dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
|
||||
|
||||
uint32_t out_comps =
|
||||
(info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
|
||||
nir_variable *out =
|
||||
nir_variable_create(b.shader, nir_var_shader_out,
|
||||
glsl_vector_type(info->out_type, out_comps),
|
||||
"out");
|
||||
out->data.location = info->loc;
|
||||
nir_variable *out = NULL;
|
||||
if (!info->stencil_fallback) {
|
||||
out = nir_variable_create(b.shader, nir_var_shader_out,
|
||||
glsl_vector_type(info->out_type, out_comps),
|
||||
"out");
|
||||
out->data.location = info->loc;
|
||||
}
|
||||
|
||||
nir_def *res = NULL;
|
||||
|
||||
@@ -771,7 +773,16 @@ dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
|
||||
res = &tex->def;
|
||||
}
|
||||
|
||||
nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
|
||||
if (info->stencil_fallback) {
|
||||
nir_def *mask_desc =
|
||||
dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "mask", 0);
|
||||
nir_def *mask = nir_load_ubo(&b, 1, 32, mask_desc, nir_imm_int(&b, 0),
|
||||
.align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
|
||||
nir_def *fail = nir_ieq_imm(&b, nir_iand(&b, nir_channel(&b, res, 0), mask), 0);
|
||||
nir_discard_if(&b, fail);
|
||||
} else {
|
||||
nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
|
||||
}
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
@@ -153,7 +153,8 @@ struct dzn_nir_blit_info {
|
||||
uint32_t sampler_dim : 4;
|
||||
uint32_t src_is_array : 1;
|
||||
uint32_t resolve_mode : 3;
|
||||
uint32_t padding : 10;
|
||||
uint32_t stencil_fallback : 1;
|
||||
uint32_t padding : 9;
|
||||
};
|
||||
const uint32_t hash_key;
|
||||
};
|
||||
|
@@ -159,7 +159,8 @@ struct dzn_meta_blit_key {
|
||||
uint32_t src_is_array : 1;
|
||||
uint32_t resolve_mode : 3;
|
||||
uint32_t linear_filter : 1;
|
||||
uint32_t padding : 9;
|
||||
uint32_t stencil_bit : 4;
|
||||
uint32_t padding : 5;
|
||||
};
|
||||
const uint64_t u64;
|
||||
};
|
||||
|
Reference in New Issue
Block a user