diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index da3c90fdfa5..bb8aef525e7 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -100,6 +100,7 @@ const struct anv_dynamic_state default_dynamic_state = { .stencil_test_enable = 0, .dyn_vbo_stride = 0, .dyn_vbo_size = 0, + .color_writes = 0xff, }; /** @@ -196,6 +197,8 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; } + ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE); + #undef ANV_CMP_COPY return changed; @@ -1411,3 +1414,22 @@ void anv_CmdSetDeviceMask( { /* No-op */ } + +void anv_CmdSetColorWriteEnableEXT( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkBool32* pColorWriteEnables) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(attachmentCount < MAX_RTS); + + uint8_t color_writes = 0; + for (uint32_t i = 0; i < attachmentCount; i++) + color_writes |= pColorWriteEnables[i] ? (1 << i) : 0; + + if (cmd_buffer->state.gfx.dynamic.color_writes != color_writes) { + cmd_buffer->state.gfx.dynamic.color_writes = color_writes; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; + } +} diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index abf1bae0441..67f30303184 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -256,6 +256,7 @@ get_device_extensions(const struct anv_physical_device *device, .EXT_4444_formats = true, .EXT_buffer_device_address = device->has_a64_buffer_access, .EXT_calibrated_timestamps = device->has_reg_timestamp, + .EXT_color_write_enable = true, .EXT_conditional_rendering = device->info.ver >= 8 || device->info.is_haswell, .EXT_conservative_rasterization = device->info.ver >= 9, @@ -1376,6 +1377,13 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: { + VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = + (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext; + features->colorWriteEnable = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: { VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features = (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 8568d7262b4..4ee864cc684 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -2079,12 +2079,32 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, } } + if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { + if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && + uses_color_att) { + assert(pCreateInfo->pColorBlendState); + const VkPipelineColorWriteCreateInfoEXT *color_write_info = + vk_find_struct_const(pCreateInfo->pColorBlendState->pNext, + PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); + + if (color_write_info) { + dynamic->color_writes = 0; + for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) { + dynamic->color_writes |= + color_write_info->pColorWriteEnables[i] ? (1u << i) : 0; + } + } + } + } + pipeline->dynamic_state_mask = states; /* For now that only state that can be either dynamic or baked in the - * pipeline is the sample location. + * pipeline is the sample location & color blend. */ - pipeline->static_state_mask = states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; + pipeline->static_state_mask = states & + (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | + ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE); } static void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f059e232351..7c262f4bec5 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2264,7 +2264,8 @@ typedef uint32_t anv_cmd_dirty_mask_t; ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | \ ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \ ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP | \ - ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) + ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | \ + ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) static inline enum anv_cmd_dirty_bits anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state) @@ -2314,6 +2315,8 @@ anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state) return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP; case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; + case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: + return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; default: assert(!"Unsupported dynamic state"); return 0; @@ -2653,6 +2656,9 @@ struct anv_dynamic_state { bool stencil_test_enable; bool dyn_vbo_stride; bool dyn_vbo_size; + + /* Bitfield, one bit per render target */ + uint8_t color_writes; }; extern const struct anv_dynamic_state default_dynamic_state; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index f9fe6285be0..3762963d8fc 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1126,6 +1126,19 @@ is_dual_src_blend_factor(VkBlendFactor factor) factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; } +static inline uint32_t * +write_disabled_blend(uint32_t *state) +{ + struct GENX(BLEND_STATE_ENTRY) entry = { + .WriteDisableAlpha = true, + .WriteDisableRed = true, + .WriteDisableGreen = true, + .WriteDisableBlue = true, + }; + GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry); + return state + GENX(BLEND_STATE_ENTRY_length); +} + static void emit_cb_state(struct anv_graphics_pipeline *pipeline, const VkPipelineColorBlendStateCreateInfo *info, @@ -1181,15 +1194,12 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline, assert(i < 8); if (info == NULL || binding->index >= info->attachmentCount) { - /* Default everything to disabled */ - struct GENX(BLEND_STATE_ENTRY) entry = { - .WriteDisableAlpha = true, - .WriteDisableRed = true, - .WriteDisableGreen = true, - .WriteDisableBlue = true, - }; - GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); - state_pos += GENX(BLEND_STATE_ENTRY_length); + state_pos = write_disabled_blend(state_pos); + continue; + } + + if ((pipeline->dynamic_state.color_writes & (1u << binding->index)) == 0) { + state_pos = write_disabled_blend(state_pos); continue; } @@ -1903,6 +1913,9 @@ has_color_buffer_write_enabled(const struct anv_graphics_pipeline *pipeline, if (!shader_bin) return false; + if (!pipeline->dynamic_state.color_writes) + return false; + const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map; for (int i = 0; i < bind_map->surface_count; i++) { struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i]; diff --git a/src/intel/vulkan/gfx7_cmd_buffer.c b/src/intel/vulkan/gfx7_cmd_buffer.c index cd0adf7b647..421967eace1 100644 --- a/src/intel/vulkan/gfx7_cmd_buffer.c +++ b/src/intel/vulkan/gfx7_cmd_buffer.c @@ -351,6 +351,62 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.gfx.dynamic.sample_locations.locations); } + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { + const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; + /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders + * threads. + */ + uint32_t dwords[GENX(3DSTATE_WM_length)]; + struct GENX(3DSTATE_WM) wm = { + GENX(3DSTATE_WM_header), + + .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch || + color_writes, + }; + GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm); + + /* Blend states of each RT */ + uint32_t surface_count = 0; + struct anv_pipeline_bind_map *map; + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { + map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; + surface_count = map->surface_count; + } + + uint32_t blend_dws[GENX(BLEND_STATE_length) + + MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; + uint32_t *dws = blend_dws; + memset(blend_dws, 0, sizeof(blend_dws)); + + /* Skip this part */ + dws += GENX(BLEND_STATE_length); + + for (uint32_t i = 0; i < surface_count; i++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; + bool write_disabled = (color_writes & (1u << binding->index)) == 0; + struct GENX(BLEND_STATE_ENTRY) entry = { + .WriteDisableAlpha = write_disabled, + .WriteDisableRed = write_disabled, + .WriteDisableGreen = write_disabled, + .WriteDisableBlue = write_disabled, + }; + GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); + dws += GENX(BLEND_STATE_ENTRY_length); + } + + uint32_t num_dwords = GENX(BLEND_STATE_length) + + GENX(BLEND_STATE_ENTRY_length) * surface_count; + + struct anv_state blend_states = + anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, + pipeline->gfx7.blend_state, num_dwords, 64); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { + bsp.BlendStatePointer = blend_states.offset; + } + } + cmd_buffer->state.gfx.dirty = 0; } diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index c265563b680..b750dea26c5 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -654,6 +654,74 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.gfx.dynamic.sample_locations.locations); } + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { + const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; + /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders + * threads. + */ + uint32_t dwords[MAX2(GENX(3DSTATE_WM_length), + GENX(3DSTATE_PS_BLEND_length))]; + struct GENX(3DSTATE_WM) wm = { + GENX(3DSTATE_WM_header), + + .ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch || + !color_writes) ? ForceON : 0, + }; + GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.wm); + + /* 3DSTATE_PS_BLEND to be consistent with the rest of the + * BLEND_STATE_ENTRY. + */ + struct GENX(3DSTATE_PS_BLEND) ps_blend = { + GENX(3DSTATE_PS_BLEND_header), + .HasWriteableRT = color_writes != 0, + }; + GENX(3DSTATE_PS_BLEND_pack)(NULL, dwords, &ps_blend); + anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.ps_blend); + + /* Blend states of each RT */ + uint32_t surface_count = 0; + struct anv_pipeline_bind_map *map; + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { + map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; + surface_count = map->surface_count; + } + + uint32_t blend_dws[GENX(BLEND_STATE_length) + + MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; + uint32_t *dws = blend_dws; + memset(blend_dws, 0, sizeof(blend_dws)); + + /* Skip this part */ + dws += GENX(BLEND_STATE_length); + + for (uint32_t i = 0; i < surface_count; i++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; + bool write_disabled = (color_writes & (1u << binding->index)) == 0; + struct GENX(BLEND_STATE_ENTRY) entry = { + .WriteDisableAlpha = write_disabled, + .WriteDisableRed = write_disabled, + .WriteDisableGreen = write_disabled, + .WriteDisableBlue = write_disabled, + }; + GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); + dws += GENX(BLEND_STATE_ENTRY_length); + } + + uint32_t num_dwords = GENX(BLEND_STATE_length) + + GENX(BLEND_STATE_ENTRY_length) * surface_count; + + struct anv_state blend_states = + anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, + pipeline->gfx8.blend_state, num_dwords, 64); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { + bsp.BlendStatePointer = blend_states.offset; + bsp.BlendStatePointerValid = true; + } + } + cmd_buffer->state.gfx.dirty = 0; }