diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c45ea64eaf1..f6478f39665 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -134,6 +134,7 @@ const struct radv_dynamic_state default_dynamic_state = { .provoking_vertex_mode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, .depth_clamp_enable = 0u, .color_write_mask = 0u, + .color_blend_enable = 0u, }; static void @@ -291,6 +292,8 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy RADV_CMP_COPY(color_write_mask, RADV_DYNAMIC_COLOR_WRITE_MASK); + RADV_CMP_COPY(color_blend_enable, RADV_DYNAMIC_COLOR_BLEND_ENABLE); + #undef RADV_CMP_COPY cmd_buffer->state.dirty |= dest_mask; @@ -1912,6 +1915,13 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.emitted_graphics_pipeline->ms.db_eqaa != pipeline->ms.db_eqaa) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE; + if (!cmd_buffer->state.emitted_graphics_pipeline || + memcmp(cmd_buffer->state.emitted_graphics_pipeline->cb_blend_control, + pipeline->cb_blend_control, sizeof(pipeline->cb_blend_control)) || + memcmp(cmd_buffer->state.emitted_graphics_pipeline->sx_mrt_blend_opt, + pipeline->sx_mrt_blend_opt, sizeof(pipeline->sx_mrt_blend_opt))) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE; + radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); if (pipeline->has_ngg_culling && @@ -3915,6 +3925,43 @@ radv_emit_sample_mask(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cmd_buffer->cs, d->sample_mask | ((uint32_t)d->sample_mask << 16)); } +static void +radv_emit_color_blend_enable(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; + const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + unsigned cb_blend_control[MAX_RTS], sx_mrt_blend_opt[MAX_RTS]; + + for (unsigned i = 0; i < MAX_RTS; i++) { + bool blend_enable = (d->color_blend_enable >> (i * 4)) & 0xf; + + cb_blend_control[i] = pipeline->cb_blend_control[i]; + sx_mrt_blend_opt[i] = pipeline->sx_mrt_blend_opt[i]; + + if (blend_enable) { + cb_blend_control[i] |= S_028780_ENABLE(1); + } else if (pdevice->rad_info.has_rbplus) { + /* Make sure to keep RB+ blend optimizations disabled for dual source blending. */ + if (G_028760_COLOR_COMB_FCN(sx_mrt_blend_opt[i]) != V_028760_OPT_COMB_NONE && + G_028760_ALPHA_COMB_FCN(sx_mrt_blend_opt[i]) != V_028760_OPT_COMB_NONE) { + sx_mrt_blend_opt[i] &= C_028760_COLOR_COMB_FCN; + sx_mrt_blend_opt[i] &= C_028760_ALPHA_COMB_FCN; + sx_mrt_blend_opt[i] |= S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); + } + } + } + + radeon_set_context_reg_seq(cmd_buffer->cs, R_028780_CB_BLEND0_CONTROL, MAX_RTS); + radeon_emit_array(cmd_buffer->cs, cb_blend_control, MAX_RTS); + + if (pdevice->rad_info.has_rbplus) { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028760_SX_MRT0_BLEND_OPT, MAX_RTS); + radeon_emit_array(cmd_buffer->cs, sx_mrt_blend_opt, MAX_RTS); + } +} + static void radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) { @@ -4020,6 +4067,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pip if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE) radv_emit_depth_clamp_enable(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE) + radv_emit_color_blend_enable(cmd_buffer); + cmd_buffer->state.dirty &= ~states; } @@ -6608,6 +6658,27 @@ radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttach state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK; } +VKAPI_ATTR void VKAPI_CALL +radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, + uint32_t attachmentCount, const VkBool32* pColorBlendEnables) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + uint32_t color_blend_enable = 0; + + assert(firstAttachment + attachmentCount <= MAX_RTS); + + for (uint32_t i = 0; i < attachmentCount; i++) { + unsigned idx = firstAttachment + i; + + color_blend_enable |= pColorBlendEnables[i] ? (0xfu << (idx * 4)) : 0; + } + + state->dynamic.color_blend_enable = color_blend_enable; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE; +} + VKAPI_ATTR void VKAPI_CALL radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 86b5c77ec06..5f6e8e98d30 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -55,8 +55,6 @@ struct radv_blend_state { uint32_t cb_target_mask; uint32_t cb_target_enabled_4bit; - uint32_t sx_mrt_blend_opt[8]; - uint32_t cb_blend_control[8]; uint32_t spi_shader_col_format; uint32_t col_format_is_int8; @@ -575,7 +573,11 @@ radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pip !(blend->cb_target_mask & (0xfu << (i * 4))))) { cf = V_028714_SPI_SHADER_ZERO; } else { - bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4)); + /* Assume blend is enabled when the state is dynamic. This might select a suboptimal format + * in some situations but changing color export formats dynamically is hard. + */ + bool blend_enable = (pipeline->dynamic_states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) || + blend->blend_enable_4bit & (0xfu << (i * 4)); cf = radv_choose_spi_color_format(pipeline->base.device, fmt, blend_enable, blend->need_src_alpha & (1 << i)); @@ -710,8 +712,6 @@ radv_can_enable_dual_src(const struct vk_color_blend_attachment_state *att) bool eqRGB_minmax = eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX; bool eqA_minmax = eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX; - assert(att->blend_enable); - if (!eqRGB_minmax && (is_dual_src(srcRGB) || is_dual_src(dstRGB))) return true; if (!eqA_minmax && (is_dual_src(srcA) || is_dual_src(dstA))) @@ -751,9 +751,6 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, VkBlendFactor srcA = state->cb->attachments[i].src_alpha_blend_factor; VkBlendFactor dstA = state->cb->attachments[i].dst_alpha_blend_factor; - blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | - S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); - if (!(pipeline->dynamic_states & RADV_DYNAMIC_COLOR_WRITE_MASK) && !state->cb->attachments[i].write_mask) continue; @@ -766,8 +763,9 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, blend.cb_target_mask |= (unsigned)state->cb->attachments[i].write_mask << (4 * i); blend.cb_target_enabled_4bit |= 0xfu << (4 * i); - if (!state->cb->attachments[i].blend_enable) { - blend.cb_blend_control[i] = blend_cntl; + if (!(pipeline->dynamic_states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) && + !state->cb->attachments[i].blend_enable) { + pipeline->cb_blend_control[i] = blend_cntl; continue; } @@ -820,12 +818,11 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; /* Set the final value. */ - blend.sx_mrt_blend_opt[i] = + pipeline->sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) | S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); - blend_cntl |= S_028780_ENABLE(1); blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(gfx_level, srcRGB)); @@ -836,7 +833,7 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(gfx_level, srcA)); blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(gfx_level, dstA)); } - blend.cb_blend_control[i] = blend_cntl; + pipeline->cb_blend_control[i] = blend_cntl; blend.blend_enable_4bit |= 0xfu << (i * 4); @@ -847,19 +844,14 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) blend.need_src_alpha |= 1 << i; } - for (i = state->cb->attachment_count; i < 8; i++) { - blend.cb_blend_control[i] = 0; - blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | - S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); - } } if (device->physical_device->rad_info.has_rbplus) { /* Disable RB+ blend optimizations for dual source blending. */ if (blend.mrt0_is_dual_src) { for (i = 0; i < 8; i++) { - blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | - S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); + pipeline->sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); } } @@ -993,7 +985,7 @@ radv_pipeline_out_of_order_rast(struct radv_graphics_pipeline *pipeline, return false; /* Be conservative if a logic operation is enabled with color buffers. */ - if (colormask && + if (colormask && (pipeline->dynamic_states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) && ((pipeline->dynamic_states & RADV_DYNAMIC_LOGIC_OP_ENABLE) || state->cb->logic_op_enable)) return false; @@ -1337,6 +1329,8 @@ radv_dynamic_state_mask(VkDynamicState state) return RADV_DYNAMIC_DEPTH_CLAMP_ENABLE; case VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT: return RADV_DYNAMIC_COLOR_WRITE_MASK; + case VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT: + return RADV_DYNAMIC_COLOR_BLEND_ENABLE; default: unreachable("Unhandled dynamic state"); } @@ -1347,9 +1341,12 @@ radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_color_blend_state *cb) { if (cb) { + if (pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_WRITE_MASK | + RADV_DYNAMIC_COLOR_BLEND_ENABLE)) + return true; + for (uint32_t i = 0; i < cb->attachment_count; i++) { - if (((pipeline->dynamic_states & RADV_DYNAMIC_COLOR_WRITE_MASK) || - cb->attachments[i].write_mask) && cb->attachments[i].blend_enable) + if (cb->attachments[i].write_mask && cb->attachments[i].blend_enable) return true; } } @@ -1924,6 +1921,15 @@ radv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline, } } + if (radv_pipeline_has_color_attachments(state->rp) && states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) { + for (unsigned i = 0; i < state->cb->attachment_count; i++) { + if (!state->cb->attachments[i].blend_enable) + continue; + + dynamic->color_blend_enable |= 0xfu << (i * 4); + } + } + pipeline->dynamic_state.mask = states; } @@ -4360,17 +4366,6 @@ radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline, const struct radv_blend_state *blend) { - const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; - - radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); - radeon_emit_array(ctx_cs, blend->cb_blend_control, 8); - - if (pdevice->rad_info.has_rbplus) { - - radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8); - radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8); - } - radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index d3bd53aae92..d7c45993eab 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1121,7 +1121,8 @@ enum radv_dynamic_state_bits { RADV_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39, RADV_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40, RADV_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41, - RADV_DYNAMIC_ALL = (1ull << 42) - 1, + RADV_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42, + RADV_DYNAMIC_ALL = (1ull << 43) - 1, }; enum radv_cmd_dirty_bits { @@ -1169,13 +1170,14 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39, RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40, RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41, - RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 42) - 1, - RADV_CMD_DIRTY_PIPELINE = 1ull << 42, - RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 43, - RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 44, - RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 45, - RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 46, - RADV_CMD_DIRTY_GUARDBAND = 1ull << 47, + RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42, + RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 43) - 1, + RADV_CMD_DIRTY_PIPELINE = 1ull << 43, + RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 44, + RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 45, + RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 46, + RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 47, + RADV_CMD_DIRTY_GUARDBAND = 1ull << 48, }; enum radv_cmd_flush_bits { @@ -1397,6 +1399,8 @@ struct radv_dynamic_state { bool depth_clamp_enable; uint32_t color_write_mask; + + uint32_t color_blend_enable; }; extern const struct radv_dynamic_state default_dynamic_state; @@ -2075,6 +2079,8 @@ struct radv_graphics_pipeline { uint8_t vtx_emit_num; uint64_t needed_dynamic_state; unsigned cb_color_control; + unsigned cb_blend_control[MAX_RTS]; + unsigned sx_mrt_blend_opt[MAX_RTS]; uint32_t binding_stride[MAX_VBS]; uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS]; uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];