nvk: VK_EXT_color_write_enable

A write mask based on the pipeline creation input is stored in scratch. Another
similar mask is also stored for the dynamic color_write_enable. These can then
be updated individually, and will be combined in MME macro before use.

Each attachment has a mask for rgba. The max number of attachments in 8 so
we can fit the write mask in a single 32bit scratch.

color_write_enable is a single bit per attachment. To make it easier to combine
in with the write mask it is stored in scratch with a separate rgba bits.

The layout of the both scratch values are:
Attachment index 88887777666655554444333322221111
Component        abgrabgrabgrabgrabgrabgrabgrabgr

dEQP-VK.pipeline.monolithic.color_write_enable.*

Test run totals:
  Passed:        576/576 (100.0%)
  Failed:        0/576 (0.0%)
  Not supported: 0/576 (0.0%)
  Warnings:      0/576 (0.0%)
  Waived:        0/576 (0.0%)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26211>
This commit is contained in:
Thomas H.P. Andersen
2023-11-19 20:07:00 +01:00
committed by Marge Bot
parent f217d267d3
commit afbaeee358
6 changed files with 100 additions and 20 deletions

View File

@@ -548,7 +548,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_border_color_swizzle DONE (anv, hasvk, lvp, nvk, radv/gfx10+, tu, v3dv, vn)
VK_EXT_buffer_device_address DONE (anv/gen8+, hasvk, nvk, radv)
VK_EXT_calibrated_timestamps DONE (anv, hasvk, lvp, radv, vn)
VK_EXT_color_write_enable DONE (anv, hasvk, lvp, radv, tu, v3dv, vn)
VK_EXT_color_write_enable DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_conditional_rendering DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
VK_EXT_conservative_rasterization DONE (anv/gen9+, radv, vn)
VK_EXT_custom_border_color DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)

View File

@@ -1519,14 +1519,72 @@ vk_to_nv9097_logic_op(VkLogicOp vk_op)
return nv9097_op;
}
void
nvk_mme_set_write_mask(struct mme_builder *b)
{
struct mme_value count = mme_load(b);
struct mme_value pipeline = nvk_mme_load_scratch(b, WRITE_MASK_PIPELINE);
struct mme_value dynamic = nvk_mme_load_scratch(b, WRITE_MASK_DYN);
/*
dynamic and pipeline are both bit fields
attachment index 88887777666655554444333322221111
component abgrabgrabgrabgrabgrabgrabgrabgr
*/
struct mme_value mask = mme_and(b, pipeline, dynamic);
mme_free_reg(b, pipeline);
mme_free_reg(b, dynamic);
struct mme_value common_mask = mme_mov(b, mme_imm(1));
struct mme_value first = mme_and(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
struct mme_value i = mme_mov(b, mme_zero());
mme_while(b, ine, i, count) {
/*
We call NV9097_SET_CT_WRITE per attachment. It needs a value as:
0x0000 0000 0000 0000 000a 000b 000g 000r
So for i=0 a mask of
0x0000 0000 0000 0000 0000 0000 0000 1111
becomes
0x0000 0000 0000 0000 0001 0001 0001 0001
*/
struct mme_value val = mme_merge(b, mme_zero(), mask, 0, 1, 0);
mme_merge_to(b, val, val, mask, 4, 1, 1);
mme_merge_to(b, val, val, mask, 8, 1, 2);
mme_merge_to(b, val, val, mask, 12, 1, 3);
mme_mthd_arr(b, NV9097_SET_CT_WRITE(0), i);
mme_emit(b, val);
mme_free_reg(b, val);
/* Check if all masks are common */
struct mme_value temp = mme_add(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
mme_if(b, ine, first, temp) {
mme_mov_to(b, common_mask, mme_zero());
}
mme_free_reg(b, temp);
mme_srl_to(b, mask, mask, mme_imm(4));
mme_add_to(b, i, i, mme_imm(1));
}
mme_mthd(b, NV9097_SET_SINGLE_CT_WRITE_CONTROL);
mme_emit(b, common_mask);
}
static void
nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
{
struct nv_push *p = nvk_cmd_buffer_push(cmd, 9);
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 9 + 4 * NVK_MAX_RTS);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE))
P_IMMD(p, NV9097, SET_LOGIC_OP, dyn->cb.logic_op_enable);
@@ -1535,7 +1593,25 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
P_IMMD(p, NV9097, SET_LOGIC_OP_FUNC, func);
}
/* MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES */
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
/* We intentionally ignore cb.attachment_count here and just fill out
* whatever is in the mask. This ensures that what we set to the MME
* scratch reg exactly matches the CPU side state.
*
* If attachment count is wrong (or changes), that will show up in the
* pipeline and the MME_SET_WRITE_MASK will be invoked again with the
* correct write mask.
*/
uint32_t color_write_enables = 0x0;
u_foreach_bit(a, dyn->cb.color_write_enables)
color_write_enables |= 0xf << (4 * a);
P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_WRITE_MASK_DYN),
color_write_enables);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
P_INLINE_DATA(p, dyn->cb.attachment_count);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
P_MTHD(p, NV9097, SET_BLEND_CONST_RED);

View File

@@ -6,6 +6,7 @@
#include "nvk_cmd_buffer.h"
#include "nvk_device.h"
#include "nvk_mme.h"
#include "nvk_physical_device.h"
#include "nvk_shader.h"
@@ -172,37 +173,31 @@ emit_pipeline_ct_write_state(struct nv_push *p,
const struct vk_color_blend_state *cb,
const struct vk_render_pass_state *rp)
{
uint32_t att_write_masks[8] = {};
uint32_t write_mask = 0;
uint32_t att_count = 0;
if (rp != NULL) {
att_count = rp->color_attachment_count;
for (uint32_t a = 0; a < rp->color_attachment_count; a++) {
VkFormat att_format = rp->color_attachment_formats[a];
att_write_masks[a] = att_format == VK_FORMAT_UNDEFINED ? 0 : 0xf;
if (att_format != VK_FORMAT_UNDEFINED)
write_mask |= 0xf << (4 * a);
}
}
if (cb != NULL) {
assert(cb->attachment_count == att_count);
uint32_t wm = 0;
for (uint32_t a = 0; a < cb->attachment_count; a++)
att_write_masks[a] &= cb->attachments[a].write_mask;
wm |= cb->attachments[a].write_mask << (a * 4);
write_mask &= wm;
}
bool indep_color_masks = true;
for (uint32_t a = 0; a < att_count; a++) {
P_IMMD(p, NV9097, SET_CT_WRITE(a), {
.r_enable = (att_write_masks[a] & BITFIELD_BIT(0)) != 0,
.g_enable = (att_write_masks[a] & BITFIELD_BIT(1)) != 0,
.b_enable = (att_write_masks[a] & BITFIELD_BIT(2)) != 0,
.a_enable = (att_write_masks[a] & BITFIELD_BIT(3)) != 0,
});
P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_WRITE_MASK_PIPELINE),
write_mask);
if (att_write_masks[a] != att_write_masks[0])
indep_color_masks = false;
}
P_IMMD(p, NV9097, SET_SINGLE_CT_WRITE_CONTROL, indep_color_masks);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
P_INLINE_DATA(p, att_count);
}
static void

View File

@@ -21,6 +21,7 @@ static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
[NVK_MME_XFB_COUNTER_LOAD] = nvk_mme_xfb_counter_load,
[NVK_MME_XFB_DRAW_INDIRECT] = nvk_mme_xfb_draw_indirect,
[NVK_MME_SET_PRIV_REG] = nvk_mme_set_priv_reg,
[NVK_MME_SET_WRITE_MASK] = nvk_mme_set_write_mask,
};
uint32_t *

View File

@@ -24,6 +24,7 @@ enum nvk_mme {
NVK_MME_XFB_COUNTER_LOAD,
NVK_MME_XFB_DRAW_INDIRECT,
NVK_MME_SET_PRIV_REG,
NVK_MME_SET_WRITE_MASK,
NVK_MME_COUNT,
};
@@ -35,6 +36,8 @@ enum nvk_mme_scratch {
NVK_MME_SCRATCH_DRAW_PAD_DW,
NVK_MME_SCRATCH_DRAW_IDX,
NVK_MME_SCRATCH_VIEW_MASK,
NVK_MME_SCRATCH_WRITE_MASK_DYN,
NVK_MME_SCRATCH_WRITE_MASK_PIPELINE,
/* Must be at the end */
NVK_MME_NUM_SCRATCH,
@@ -122,5 +125,6 @@ void nvk_mme_copy_queries(struct mme_builder *b);
void nvk_mme_xfb_counter_load(struct mme_builder *b);
void nvk_mme_xfb_draw_indirect(struct mme_builder *b);
void nvk_mme_set_priv_reg(struct mme_builder *b);
void nvk_mme_set_write_mask(struct mme_builder *b);
#endif /* NVK_MME_H */

View File

@@ -139,6 +139,7 @@ nvk_get_device_extensions(const struct nv_device_info *info,
.EXT_border_color_swizzle = true,
.EXT_buffer_device_address = true,
.EXT_conditional_rendering = true,
.EXT_color_write_enable = true,
.EXT_custom_border_color = true,
.EXT_depth_bias_control = true,
.EXT_depth_clip_control = true,
@@ -348,6 +349,9 @@ nvk_get_device_features(const struct nv_device_info *info,
/* VK_EXT_buffer_device_address */
.bufferDeviceAddressCaptureReplayEXT = true,
/* VK_EXT_color_write_enable */
.colorWriteEnable = true,
/* VK_EXT_conditional_rendering */
.conditionalRendering = true,
.inheritedConditionalRendering = true,