nvk: VK_EXT_color_write_enable

A write mask based on the pipeline creation input is stored in scratch. Another similar mask is also stored for the dynamic color_write_enable. These can then be updated individually, and will be combined in MME macro before use. Each attachment has a mask for rgba. The max number of attachments in 8 so we can fit the write mask in a single 32bit scratch. color_write_enable is a single bit per attachment. To make it easier to combine in with the write mask it is stored in scratch with a separate rgba bits. The layout of the both scratch values are: Attachment index 88887777666655554444333322221111 Component abgrabgrabgrabgrabgrabgrabgrabgr dEQP-VK.pipeline.monolithic.color_write_enable.* Test run totals: Passed: 576/576 (100.0%) Failed: 0/576 (0.0%) Not supported: 0/576 (0.0%) Warnings: 0/576 (0.0%) Waived: 0/576 (0.0%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26211>
2023-11-19 20:07:00 +01:00
parent f217d267d3
commit afbaeee358
6 changed files with 100 additions and 20 deletions
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -548,7 +548,7 @@ Khronos extensions that are not part of any Vulkan version:
  VK_EXT_border_color_swizzle                           DONE (anv, hasvk, lvp, nvk, radv/gfx10+, tu, v3dv, vn)
  VK_EXT_buffer_device_address                          DONE (anv/gen8+, hasvk, nvk, radv)
  VK_EXT_calibrated_timestamps                          DONE (anv, hasvk, lvp, radv, vn)
-  VK_EXT_color_write_enable                             DONE (anv, hasvk, lvp, radv, tu, v3dv, vn)
+  VK_EXT_color_write_enable                             DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
  VK_EXT_conditional_rendering                          DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
  VK_EXT_conservative_rasterization                     DONE (anv/gen9+, radv, vn)
  VK_EXT_custom_border_color                            DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)
--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@@ -1519,14 +1519,72 @@ vk_to_nv9097_logic_op(VkLogicOp vk_op)
   return nv9097_op;
 }

+void
+nvk_mme_set_write_mask(struct mme_builder *b)
+{
+   struct mme_value count = mme_load(b);
+   struct mme_value pipeline = nvk_mme_load_scratch(b, WRITE_MASK_PIPELINE);
+   struct mme_value dynamic = nvk_mme_load_scratch(b, WRITE_MASK_DYN);
+
+   /*
+      dynamic and pipeline are both bit fields
+
+      attachment index 88887777666655554444333322221111
+      component        abgrabgrabgrabgrabgrabgrabgrabgr
+   */
+
+   struct mme_value mask = mme_and(b, pipeline, dynamic);
+   mme_free_reg(b, pipeline);
+   mme_free_reg(b, dynamic);
+
+   struct mme_value common_mask = mme_mov(b, mme_imm(1));
+   struct mme_value first = mme_and(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
+   struct mme_value i = mme_mov(b, mme_zero());
+
+   mme_while(b, ine, i, count) {
+      /*
+         We call NV9097_SET_CT_WRITE per attachment. It needs a value as:
+         0x0000 0000 0000 0000 000a 000b 000g 000r
+
+         So for i=0 a mask of
+         0x0000 0000 0000 0000 0000 0000 0000 1111
+         becomes
+         0x0000 0000 0000 0000 0001 0001 0001 0001
+      */
+
+      struct mme_value val = mme_merge(b, mme_zero(), mask, 0, 1, 0);
+      mme_merge_to(b, val, val, mask, 4, 1, 1);
+      mme_merge_to(b, val, val, mask, 8, 1, 2);
+      mme_merge_to(b, val, val, mask, 12, 1, 3);
+
+      mme_mthd_arr(b, NV9097_SET_CT_WRITE(0), i);
+      mme_emit(b, val);
+      mme_free_reg(b, val);
+
+      /* Check if all masks are common */
+      struct mme_value temp = mme_add(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
+      mme_if(b, ine, first, temp) {
+         mme_mov_to(b, common_mask, mme_zero());
+      }
+      mme_free_reg(b, temp);
+
+      mme_srl_to(b, mask, mask, mme_imm(4));
+
+      mme_add_to(b, i, i, mme_imm(1));
+   }
+
+   mme_mthd(b, NV9097_SET_SINGLE_CT_WRITE_CONTROL);
+   mme_emit(b, common_mask);
+}
+
 static void
 nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
 {
-   struct nv_push *p = nvk_cmd_buffer_push(cmd, 9);
-
   const struct vk_dynamic_graphics_state *dyn =
      &cmd->vk.dynamic_graphics_state;

+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 9 + 4 * NVK_MAX_RTS);
+
   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE))
      P_IMMD(p, NV9097, SET_LOGIC_OP, dyn->cb.logic_op_enable);

@@ -1535,7 +1593,25 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
      P_IMMD(p, NV9097, SET_LOGIC_OP_FUNC, func);
   }

-   /* MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES */
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
+      /* We intentionally ignore cb.attachment_count here and just fill out
+       * whatever is in the mask.  This ensures that what we set to the MME
+       * scratch reg exactly matches the CPU side state.
+       *
+       * If attachment count is wrong (or changes), that will show up in the
+       * pipeline and the MME_SET_WRITE_MASK will be invoked again with the
+       * correct write mask.
+       */
+      uint32_t color_write_enables = 0x0;
+      u_foreach_bit(a, dyn->cb.color_write_enables)
+         color_write_enables |= 0xf << (4 * a);
+
+      P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_WRITE_MASK_DYN),
+             color_write_enables);
+
+      P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
+      P_INLINE_DATA(p, dyn->cb.attachment_count);
+   }

   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
      P_MTHD(p, NV9097, SET_BLEND_CONST_RED);
--- a/src/nouveau/vulkan/nvk_graphics_pipeline.c
+++ b/src/nouveau/vulkan/nvk_graphics_pipeline.c
@@ -6,6 +6,7 @@

 #include "nvk_cmd_buffer.h"
 #include "nvk_device.h"
+#include "nvk_mme.h"
 #include "nvk_physical_device.h"
 #include "nvk_shader.h"

@@ -172,37 +173,31 @@ emit_pipeline_ct_write_state(struct nv_push *p,
                             const struct vk_color_blend_state *cb,
                             const struct vk_render_pass_state *rp)
 {
-   uint32_t att_write_masks[8] = {};
+   uint32_t write_mask = 0;
   uint32_t att_count = 0;

   if (rp != NULL) {
      att_count = rp->color_attachment_count;
      for (uint32_t a = 0; a < rp->color_attachment_count; a++) {
         VkFormat att_format = rp->color_attachment_formats[a];
-         att_write_masks[a] = att_format == VK_FORMAT_UNDEFINED ? 0 : 0xf;
+         if (att_format != VK_FORMAT_UNDEFINED)
+            write_mask |= 0xf << (4 * a);
      }
   }

   if (cb != NULL) {
      assert(cb->attachment_count == att_count);
+      uint32_t wm = 0;
      for (uint32_t a = 0; a < cb->attachment_count; a++)
-         att_write_masks[a] &= cb->attachments[a].write_mask;
+         wm |= cb->attachments[a].write_mask << (a * 4);
+      write_mask &= wm;
   }

-   bool indep_color_masks = true;
-   for (uint32_t a = 0; a < att_count; a++) {
-      P_IMMD(p, NV9097, SET_CT_WRITE(a), {
-         .r_enable = (att_write_masks[a] & BITFIELD_BIT(0)) != 0,
-         .g_enable = (att_write_masks[a] & BITFIELD_BIT(1)) != 0,
-         .b_enable = (att_write_masks[a] & BITFIELD_BIT(2)) != 0,
-         .a_enable = (att_write_masks[a] & BITFIELD_BIT(3)) != 0,
-      });
+   P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_WRITE_MASK_PIPELINE),
+          write_mask);

-      if (att_write_masks[a] != att_write_masks[0])
-         indep_color_masks = false;
-   }
-
-   P_IMMD(p, NV9097, SET_SINGLE_CT_WRITE_CONTROL, indep_color_masks);
+   P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
+   P_INLINE_DATA(p, att_count);
 }

 static void
--- a/src/nouveau/vulkan/nvk_mme.c
+++ b/src/nouveau/vulkan/nvk_mme.c
@@ -21,6 +21,7 @@ static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
   [NVK_MME_XFB_COUNTER_LOAD]            = nvk_mme_xfb_counter_load,
   [NVK_MME_XFB_DRAW_INDIRECT]           = nvk_mme_xfb_draw_indirect,
   [NVK_MME_SET_PRIV_REG]                = nvk_mme_set_priv_reg,
+   [NVK_MME_SET_WRITE_MASK]              = nvk_mme_set_write_mask,
 };

 uint32_t *
--- a/src/nouveau/vulkan/nvk_mme.h
+++ b/src/nouveau/vulkan/nvk_mme.h
@@ -24,6 +24,7 @@ enum nvk_mme {
   NVK_MME_XFB_COUNTER_LOAD,
   NVK_MME_XFB_DRAW_INDIRECT,
   NVK_MME_SET_PRIV_REG,
+   NVK_MME_SET_WRITE_MASK,
   NVK_MME_COUNT,
 };

@@ -35,6 +36,8 @@ enum nvk_mme_scratch {
   NVK_MME_SCRATCH_DRAW_PAD_DW,
   NVK_MME_SCRATCH_DRAW_IDX,
   NVK_MME_SCRATCH_VIEW_MASK,
+   NVK_MME_SCRATCH_WRITE_MASK_DYN,
+   NVK_MME_SCRATCH_WRITE_MASK_PIPELINE,

   /* Must be at the end */
   NVK_MME_NUM_SCRATCH,
@@ -122,5 +125,6 @@ void nvk_mme_copy_queries(struct mme_builder *b);
 void nvk_mme_xfb_counter_load(struct mme_builder *b);
 void nvk_mme_xfb_draw_indirect(struct mme_builder *b);
 void nvk_mme_set_priv_reg(struct mme_builder *b);
+void nvk_mme_set_write_mask(struct mme_builder *b);

 #endif /* NVK_MME_H */
--- a/src/nouveau/vulkan/nvk_physical_device.c
+++ b/src/nouveau/vulkan/nvk_physical_device.c
@@ -139,6 +139,7 @@ nvk_get_device_extensions(const struct nv_device_info *info,
      .EXT_border_color_swizzle = true,
      .EXT_buffer_device_address = true,
      .EXT_conditional_rendering = true,
+      .EXT_color_write_enable = true,
      .EXT_custom_border_color = true,
      .EXT_depth_bias_control = true,
      .EXT_depth_clip_control = true,
@@ -348,6 +349,9 @@ nvk_get_device_features(const struct nv_device_info *info,
      /* VK_EXT_buffer_device_address */
      .bufferDeviceAddressCaptureReplayEXT = true,

+      /* VK_EXT_color_write_enable */
+      .colorWriteEnable = true,
+
      /* VK_EXT_conditional_rendering */
      .conditionalRendering = true,
      .inheritedConditionalRendering = true,