diff --git a/src/intel/genxml/meson.build b/src/intel/genxml/meson.build index 0932e2c0179..7e131c319a7 100644 --- a/src/intel/genxml/meson.build +++ b/src/intel/genxml/meson.build @@ -70,6 +70,7 @@ genX_bits_included_symbols = [ 'RENDER_SURFACE_STATE::Alpha Clear Color', 'CLEAR_COLOR', 'VERTEX_BUFFER_STATE::Buffer Starting Address', + 'CPS_STATE', ] genX_bits_h = custom_target( diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index d197f3e9dd0..9b548a5c487 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -208,8 +208,10 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE); - ANV_CMP_COPY(fragment_shading_rate.width, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE); - ANV_CMP_COPY(fragment_shading_rate.height, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE); + ANV_CMP_COPY(fragment_shading_rate.rate.width, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE); + ANV_CMP_COPY(fragment_shading_rate.rate.height, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE); + ANV_CMP_COPY(fragment_shading_rate.ops[0], ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE); + ANV_CMP_COPY(fragment_shading_rate.ops[1], ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE); #undef ANV_CMP_COPY @@ -1338,6 +1340,25 @@ void anv_TrimCommandPool( /* Nothing for us to do here. Our pools stay pretty tidy. */ } +/** + * Return NULL if the current subpass has no color attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_first_color_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + + if (subpass->color_count == 0) + return NULL; + + const struct anv_image_view *iview = + cmd_buffer->state.attachments[subpass->color_attachments[0].attachment].image_view; + + assert(iview->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT); + + return iview; +} + /** * Return NULL if the current subpass has no depthstencil attachment. */ @@ -1358,6 +1379,25 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) return iview; } +/** + * Return NULL if the current subpass has no fragment shading rate attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_fsr_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + + if (subpass->fsr_attachment == NULL) + return NULL; + + const struct anv_image_view *iview = + cmd_buffer->state.attachments[subpass->fsr_attachment->attachment].image_view; + + assert(iview->image->vk.usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR); + + return iview; +} + static struct anv_descriptor_set * anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, @@ -1610,8 +1650,15 @@ void anv_CmdSetFragmentShadingRateKHR( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.gfx.dynamic.fragment_shading_rate = *pFragmentSize; - cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE; + if (cmd_buffer->state.gfx.dynamic.fragment_shading_rate.rate.width != pFragmentSize->width || + cmd_buffer->state.gfx.dynamic.fragment_shading_rate.rate.height != pFragmentSize->height || + cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops[0] != combinerOps[0] || + cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops[1] != combinerOps[1]) { + cmd_buffer->state.gfx.dynamic.fragment_shading_rate.rate = *pFragmentSize; + memcpy(cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops, combinerOps, + sizeof(cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops)); + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE; + } } static inline uint32_t diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 1a325ea63c2..cfe1d21ed97 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -60,6 +60,7 @@ #include "perf/intel_perf.h" #include "genxml/gen7_pack.h" +#include "genxml/genX_bits.h" static const driOptionDescription anv_dri_options[] = { DRI_CONF_SECTION_PERFORMANCE @@ -1554,7 +1555,10 @@ void anv_GetPhysicalDeviceFeatures2( (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext; features->attachmentFragmentShadingRate = false; features->pipelineFragmentShadingRate = true; - features->primitiveFragmentShadingRate = false; + features->primitiveFragmentShadingRate = + pdevice->info.has_coarse_pixel_primitive_and_cb; + features->attachmentFragmentShadingRate = + pdevice->info.has_coarse_pixel_primitive_and_cb; break; } @@ -2288,27 +2292,48 @@ void anv_GetPhysicalDeviceProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: { VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props = (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext; - /* Those must be 0 if attachmentFragmentShadingRate is not - * supported. - */ - props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 }; - props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 }; - props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0; - - props->primitiveFragmentShadingRateWithMultipleViewports = false; - props->layeredShadingRateAttachments = false; - props->fragmentShadingRateNonTrivialCombinerOps = false; + props->primitiveFragmentShadingRateWithMultipleViewports = + pdevice->info.has_coarse_pixel_primitive_and_cb; + props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb; + props->fragmentShadingRateNonTrivialCombinerOps = + pdevice->info.has_coarse_pixel_primitive_and_cb; props->maxFragmentSize = (VkExtent2D) { 4, 4 }; - props->maxFragmentSizeAspectRatio = 4; - props->maxFragmentShadingRateCoverageSamples = 4 * 4 * 16; - props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT; + props->maxFragmentSizeAspectRatio = + pdevice->info.has_coarse_pixel_primitive_and_cb ? + 2 : 4; + props->maxFragmentShadingRateCoverageSamples = 4 * 4 * + (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16); + props->maxFragmentShadingRateRasterizationSamples = + pdevice->info.has_coarse_pixel_primitive_and_cb ? + VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT; props->fragmentShadingRateWithShaderDepthStencilWrites = false; props->fragmentShadingRateWithSampleMask = true; props->fragmentShadingRateWithShaderSampleMask = false; props->fragmentShadingRateWithConservativeRasterization = true; props->fragmentShadingRateWithFragmentShaderInterlock = true; props->fragmentShadingRateWithCustomSampleLocations = true; - props->fragmentShadingRateStrictMultiplyCombiner = false; + + /* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having + * the fix. + */ + props->fragmentShadingRateStrictMultiplyCombiner = + pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ? + pdevice->info.revision >= 8 : + pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ? + pdevice->info.revision >= 4 : true; + + if (pdevice->info.has_coarse_pixel_primitive_and_cb) { + props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 }; + props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 }; + props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1; + } else { + /* Those must be 0 if attachmentFragmentShadingRate is not + * supported. + */ + props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 }; + props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 }; + props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0; + } break; } @@ -3233,6 +3258,28 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_workaround_bo; + if (device->info.ver >= 12 && + device->vk.enabled_extensions.KHR_fragment_shading_rate) { + uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */ + + if (device->info.has_coarse_pixel_primitive_and_cb) + n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */ + + n_cps_states += 1; /* Disable CPS */ + + /* Each of the combinaison must be replicated on all viewports */ + n_cps_states *= MAX_VIEWPORTS; + + device->cps_states = + anv_state_pool_alloc(&device->dynamic_state_pool, + n_cps_states * CPS_STATE_length(&device->info) * 4, + 32); + if (device->cps_states.map == NULL) + goto fail_trivial_batch; + + anv_genX(&device->info, init_cps_device_state)(device); + } + /* Allocate a null surface state at surface state offset 0. This makes * NULL descriptor handling trivial because we can just memset structures * to zero and they have a valid descriptor. @@ -3277,6 +3324,7 @@ VkResult anv_CreateDevice( anv_pipeline_cache_finish(&device->default_pipeline_cache); fail_trivial_batch_bo_and_scratch_pool: anv_scratch_pool_finish(device, &device->scratch_pool); + fail_trivial_batch: anv_device_release_bo(device, device->trivial_batch_bo); fail_workaround_bo: anv_device_release_bo(device, device->workaround_bo); @@ -3352,6 +3400,7 @@ void anv_DestroyDevice( anv_state_reserved_pool_finish(&device->custom_border_colors); anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash); + anv_state_pool_free(&device->dynamic_state_pool, device->cps_states); #endif for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) { @@ -4651,14 +4700,45 @@ VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR( VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&physical_device->isl_dev); + /* BSpec 47003: There are a number of restrictions on the sample count + * based off the coarse pixel size. + */ + static const VkSampleCountFlags cp_size_sample_limits[] = { + [1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT | + ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT, + [2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT, + [4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT, + [8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT, + [16] = ISL_SAMPLE_COUNT_1_BIT, + }; + for (uint32_t x = 4; x >= 1; x /= 2) { for (uint32_t y = 4; y >= 1; y /= 2) { - /* For size {1, 1}, the sample count must be ~0 */ - if (x == 1 && y == 1) - append_rate(~0, x, y); - else - append_rate(sample_counts, x, y); - } + if (physical_device->info.has_coarse_pixel_primitive_and_cb) { + /* BSpec 47003: + * "CPsize 1x4 and 4x1 are not supported" + */ + if ((x == 1 && y == 4) || (x == 4 && y == 1)) + continue; + + /* For size {1, 1}, the sample count must be ~0 + * + * 4x2 is also a specially case. + */ + if (x == 1 && y == 1) + append_rate(~0, x, y); + else if (x == 4 && y == 2) + append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y); + else + append_rate(cp_size_sample_limits[x * y], x, y); + } else { + /* For size {1, 1}, the sample count must be ~0 */ + if (x == 1 && y == 1) + append_rate(~0, x, y); + else + append_rate(sample_counts, x, y); + } + } } #undef append_rate diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 8e264085b1c..5343433bd00 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -807,6 +807,11 @@ anv_get_image_format_features2(const struct intel_device_info *devinfo, } } + if (devinfo->has_coarse_pixel_primitive_and_cb && + vk_format == VK_FORMAT_R8_UINT && + vk_tiling == VK_IMAGE_TILING_OPTIMAL) + flags |= VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + return flags; } diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 88b45a889ee..3a7d87cae08 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -52,6 +52,8 @@ void genX(init_physical_device_state)(struct anv_physical_device *device); VkResult genX(init_device_state)(struct anv_device *device); +void genX(init_cps_device_state)(struct anv_device *device); + void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer); @@ -132,7 +134,6 @@ void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples, void genX(emit_shading_rate)(struct anv_batch *batch, const struct anv_graphics_pipeline *pipeline, - struct anv_state cps_states, struct anv_dynamic_state *dynamic_state); void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index ef8a7171c34..755adf2f2dd 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -216,6 +216,9 @@ choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + if (vk_usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) + isl_usage |= ISL_SURF_USAGE_CPB_BIT; + if (vk_create_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) isl_usage |= ISL_SURF_USAGE_CUBE_BIT; diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index d823760f99a..a4765479c60 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -247,12 +247,16 @@ num_subpass_attachments2(const VkSubpassDescription2KHR *desc) const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve = vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); + const VkFragmentShadingRateAttachmentInfoKHR *fsr_attachment = + vk_find_struct_const(desc->pNext, + FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); return desc->inputAttachmentCount + desc->colorAttachmentCount + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + (desc->pDepthStencilAttachment != NULL) + - (ds_resolve && ds_resolve->pDepthStencilResolveAttachment); + (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) + + (fsr_attachment != NULL && fsr_attachment->pFragmentShadingRateAttachment); } VkResult anv_CreateRenderPass2( @@ -391,6 +395,22 @@ VkResult anv_CreateRenderPass2( subpass->depth_resolve_mode = ds_resolve->depthResolveMode; subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; } + + const VkFragmentShadingRateAttachmentInfoKHR *fsr_attachment = + vk_find_struct_const(desc->pNext, + FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + + if (fsr_attachment && fsr_attachment->pFragmentShadingRateAttachment) { + subpass->fsr_attachment = subpass_attachments++; + + *subpass->fsr_attachment = (struct anv_subpass_attachment) { + .usage = VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, + .attachment = fsr_attachment->pFragmentShadingRateAttachment->attachment, + .layout = fsr_attachment->pFragmentShadingRateAttachment->layout, + }; + subpass->fsr_extent = fsr_attachment->shadingRateAttachmentTexelSize; + } + } for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { @@ -498,8 +518,8 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass, const VkRenderingInfoKHR *info) { uint32_t att_count; - uint32_t color_count = 0, ds_count = 0; - uint32_t ds_idx; + uint32_t color_count = 0, ds_count = 0, fsr_count = 0; + uint32_t ds_idx, fsr_idx; bool has_color_resolve, has_ds_resolve; struct anv_render_pass *pass = &dyn_render_pass->pass; @@ -514,6 +534,9 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass, dyn_render_pass->suspending = info->flags & VK_RENDERING_SUSPENDING_BIT_KHR; dyn_render_pass->resuming = info->flags & VK_RENDERING_RESUMING_BIT_KHR; + /* Get the total attachment count by counting color, depth & fragment + * shading rate views. + */ color_count = info->colorAttachmentCount; if ((info->pDepthAttachment && info->pDepthAttachment->imageView) || (info->pStencilAttachment && info->pStencilAttachment->imageView)) @@ -538,8 +561,15 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass, if (has_ds_resolve) ds_count *= 2; - att_count = color_count + ds_count; + const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_attachment = + vk_find_struct_const(info->pNext, + RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + if (fsr_attachment && fsr_attachment->imageView != VK_NULL_HANDLE) + fsr_count = 1; + + att_count = color_count + ds_count + fsr_count; ds_idx = color_count; + fsr_idx = color_count + ds_count; /* Setup pass & subpass */ *pass = (struct anv_render_pass) { @@ -605,7 +635,7 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass, ANV_FROM_HANDLE(anv_image_view, iview, d_or_s_att->imageView); - pass->attachments[ds_idx] = (struct anv_render_pass_attachment) { + pass->attachments[ds_idx] = (struct anv_render_pass_attachment) { .format = iview->vk.format, .samples = iview->vk.image->samples, .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, @@ -635,4 +665,20 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass, subpass->stencil_resolve_mode = stencil_resolve_mode; } } + + if (fsr_count) { + ANV_FROM_HANDLE(anv_image_view, iview, fsr_attachment->imageView); + + pass->attachments[fsr_idx] = (struct anv_render_pass_attachment) { + .format = iview->vk.format, + .samples = iview->vk.image->samples, + .usage = VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, + }; + + *subpass->fsr_attachment = (struct anv_subpass_attachment) { + .usage = VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, + .attachment = fsr_idx, + }; + subpass->fsr_extent = fsr_attachment->shadingRateAttachmentTexelSize; + } } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 218981df818..cf6b06c6f5c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -324,8 +324,6 @@ void anv_DestroyPipeline( if (gfx_pipeline->blend_state.map) anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state); - if (gfx_pipeline->cps_state.map) - anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state); for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) { if (gfx_pipeline->shaders[s]) @@ -2345,8 +2343,11 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); if (fsr_state) { - if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) - dynamic->fragment_shading_rate = fsr_state->fragmentSize; + if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) { + dynamic->fragment_shading_rate.rate = fsr_state->fragmentSize; + memcpy(dynamic->fragment_shading_rate.ops, fsr_state->combinerOps, + sizeof(dynamic->fragment_shading_rate.ops)); + } } pipeline->dynamic_state_mask = states; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 82e814fd3b4..e5247634d1d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1194,6 +1194,16 @@ struct anv_device { struct anv_state slice_hash; + /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements + * + * We need to emit CPS_STATE structures for each viewport accessible by a + * pipeline. So rather than write many identical CPS_STATE structures + * dynamically, we can enumerate all possible combinaisons and then just + * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this + * array. + */ + struct anv_state cps_states; + uint32_t queue_count; struct anv_queue * queues; @@ -2696,7 +2706,10 @@ struct anv_dynamic_state { VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS]; } sample_locations; - VkExtent2D fragment_shading_rate; + struct { + VkExtent2D rate; + VkFragmentShadingRateCombinerOpKHR ops[2]; + } fragment_shading_rate; VkCullModeFlags cull_mode; VkFrontFace front_face; @@ -2951,6 +2964,9 @@ struct anv_subpass { VkResolveModeFlagBitsKHR depth_resolve_mode; VkResolveModeFlagBitsKHR stencil_resolve_mode; + struct anv_subpass_attachment * fsr_attachment; + VkExtent2D fsr_extent; + uint32_t view_mask; /** Subpass has a depth/stencil self-dependency */ @@ -2994,8 +3010,9 @@ struct anv_render_pass { /* RTs * 2 (for resolve attachments) * depth/sencil * 2 + * fragment shading rate * 1 */ -#define MAX_DYN_RENDER_ATTACHMENTS (MAX_RTS * 2 + 2 * 2) +#define MAX_DYN_RENDER_ATTACHMENTS (MAX_RTS * 2 + 2 * 2 + 1) /* And this, kids, is what we call a nasty hack. */ struct anv_dynamic_render_pass { @@ -3271,9 +3288,15 @@ anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); +const struct anv_image_view * +anv_cmd_buffer_get_first_color_view(const struct anv_cmd_buffer *cmd_buffer); + const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); +const struct anv_image_view * +anv_cmd_buffer_get_fsr_view(const struct anv_cmd_buffer *cmd_buffer); + VkResult anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t num_entries, @@ -3499,8 +3522,6 @@ struct anv_graphics_pipeline { struct anv_state blend_state; - struct anv_state cps_state; - uint32_t vb_used; struct anv_pipeline_vertex_binding { uint32_t stride; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 5b454d9ecc9..f929bc084e1 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -6011,6 +6011,42 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(info.hiz_usage); } +static void +cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer) +{ +#if GFX_VERx10 >= 125 + struct anv_device *device = cmd_buffer->device; + + if (!device->vk.enabled_extensions.KHR_fragment_shading_rate) + return; + + uint32_t *dw = anv_batch_emit_dwords(&cmd_buffer->batch, + device->isl_dev.cpb.size / 4); + if (dw == NULL) + return; + + struct isl_cpb_emit_info info = { }; + + const struct anv_image_view *fsr_iview = + anv_cmd_buffer_get_fsr_view(cmd_buffer); + if (fsr_iview) { + info.view = &fsr_iview->planes[0].isl; + info.surf = &fsr_iview->image->planes[0].primary_surface.isl; + info.address = + anv_batch_emit_reloc(&cmd_buffer->batch, + dw + device->isl_dev.cpb.offset / 4, + fsr_iview->image->bindings[0].address.bo, + fsr_iview->image->bindings[0].address.offset + + fsr_iview->image->bindings[0].memory_range.offset); + info.mocs = + anv_mocs(device, fsr_iview->image->bindings[0].address.bo, + ISL_SURF_USAGE_CPB_BIT); + } + + isl_emit_cpb_control_s(&device->isl_dev, dw, &info); +#endif /* GFX_VERx10 >= 125 */ +} + /** * This ANDs the view mask of the current subpass with the pending clear * views in the attachment to get the mask of views active in the subpass @@ -6269,12 +6305,13 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, continue; assert(a < cmd_state->pass->attachment_count); + struct anv_subpass_attachment *att = &subpass->attachments[i]; struct anv_attachment_state *att_state = &cmd_state->attachments[a]; - struct anv_image_view *iview = cmd_state->attachments[a].image_view; + struct anv_image_view *iview = att_state->image_view; const struct anv_image *image = iview->image; - VkImageLayout target_layout = subpass->attachments[i].layout; + VkImageLayout target_layout = att->layout; VkImageLayout target_stencil_layout = subpass->attachments[i].stencil_layout; @@ -6295,6 +6332,22 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, layer_count = fb->layers; } + /* Treat the fragment shading rate attachment as color. But make sure we + * don't use fb->layers if the fragment shading rate attachment only has + * one layer. + * + * Vulkan spec 1.2.170 - VkFramebufferCreateInfo : + * + * "each element of pAttachments that is used as a fragment shading + * rate attachment by renderPass must have a layerCount that is + * either 1, or greater than layers" + */ + if ((att->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) && + iview->planes[0].isl.array_len == 1) { + base_layer = 0; + layer_count = 1; + } + if (image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { bool will_full_fast_clear = (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) && @@ -6464,6 +6517,8 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, #endif cmd_buffer_emit_depth_stencil(cmd_buffer); + + cmd_buffer_emit_cps_control_buffer(cmd_buffer); } static enum blorp_filter @@ -6820,8 +6875,9 @@ cmd_buffer_do_layout_transitions(struct anv_cmd_buffer *cmd_buffer, continue; assert(a < cmd_state->pass->attachment_count); + struct anv_subpass_attachment *att = &subpass->attachments[i]; struct anv_attachment_state *att_state = &cmd_state->attachments[a]; - struct anv_image_view *iview = cmd_state->attachments[a].image_view; + struct anv_image_view *iview = att_state->image_view; const struct anv_image *image = iview->image; /* Transition the image into the final layout for this render pass */ @@ -6840,6 +6896,22 @@ cmd_buffer_do_layout_transitions(struct anv_cmd_buffer *cmd_buffer, layer_count = fb->layers; } + /* Treat the fragment shading rate attachment as color. But make sure we + * don't use fb->layers if the fragment shading rate attachment only has + * one layer. + * + * Vulkan spec 1.2.170 - VkFramebufferCreateInfo : + * + * "each element of pAttachments that is used as a fragment shading + * rate attachment by renderPass must have a layerCount that is + * either 1, or greater than layers" + */ + if (att->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR && + iview->planes[0].isl.array_len == 1) { + base_layer = 0; + layer_count = 1; + } + if (image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT); transition_color_buffer(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, @@ -7141,6 +7213,18 @@ genX(cmd_buffer_setup_attachments_dynrender)(struct anv_cmd_buffer *cmd_buffer, } } + if (subpass->fsr_attachment) { + const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_att_info = + vk_find_struct_const(info->pNext, + RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + assert(fsr_att_info); + + struct anv_attachment_state *fsr_att_state = + &state->attachments[subpass->fsr_attachment->attachment]; + fsr_att_state->image_view = + anv_image_view_from_handle(fsr_att_info->imageView); + } + return VK_SUCCESS; } @@ -7251,6 +7335,8 @@ cmd_buffer_begin_rendering(struct anv_cmd_buffer *cmd_buffer, #endif cmd_buffer_emit_depth_stencil(cmd_buffer); + + cmd_buffer_emit_cps_control_buffer(cmd_buffer); } static void diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index a80435fe3f5..8640d9a1ac2 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -847,22 +847,16 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline, anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) { sm.SampleMask = sample_mask; } +} - pipeline->cps_state = ANV_STATE_NULL; +static void +emit_3dstate_cps(struct anv_graphics_pipeline *pipeline, uint32_t dynamic_states) +{ #if GFX_VER >= 11 if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) && pipeline->base.device->vk.enabled_extensions.KHR_fragment_shading_rate) { -#if GFX_VER >= 12 - struct anv_device *device = pipeline->base.device; - const uint32_t num_dwords = - GENX(CPS_STATE_length) * 4 * pipeline->dynamic_state.viewport.count; - pipeline->cps_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords, 32); -#endif - genX(emit_shading_rate)(&pipeline->base.batch, pipeline, - pipeline->cps_state, &pipeline->dynamic_state); } #endif @@ -2407,6 +2401,12 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline, ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients = wm_prog_data->uses_depth_w_coefficients; ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch; +#endif +#if GFX_VERx10 >= 125 + /* TODO: We should only require this when the last geometry shader uses + * a fragment shading rate that is not constant. + */ + ps.EnablePSDependencyOnCPsizeChange = wm_prog_data->per_coarse_pixel_dispatch; #endif } } @@ -2612,6 +2612,8 @@ genX(graphics_pipeline_create)( emit_3dstate_vf_statistics(pipeline); + emit_3dstate_cps(pipeline, dynamic_states); + emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState, dynamic_states); } diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 104d0a8efd8..a91c07418f6 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -358,6 +358,111 @@ genX(init_device_state)(struct anv_device *device) return res; } +#if GFX_VERx10 >= 125 +#define maybe_for_each_shading_rate_op(name) \ + for (VkFragmentShadingRateCombinerOpKHR name = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; \ + name <= VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR; \ + name++) +#elif GFX_VER >= 12 +#define maybe_for_each_shading_rate_op(name) +#endif + +/* Rather than reemitting the CPS_STATE structure everything those changes and + * for as many viewports as needed, we can just prepare all possible cases and + * just pick the right offset from the prepacked states when needed. + */ +void +genX(init_cps_device_state)(struct anv_device *device) +{ +#if GFX_VER >= 12 + void *cps_state_ptr = device->cps_states.map; + + /* Disabled CPS mode */ + for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) { + struct GENX(CPS_STATE) cps_state = { + .CoarsePixelShadingMode = CPS_MODE_CONSTANT, + .MinCPSizeX = 1, + .MinCPSizeY = 1, +#if GFX_VERx10 >= 125 + .Combiner0OpcodeforCPsize = PASSTHROUGH, + .Combiner1OpcodeforCPsize = PASSTHROUGH, +#endif /* GFX_VERx10 >= 125 */ + + }; + + GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state); + cps_state_ptr += GENX(CPS_STATE_length) * 4; + } + + maybe_for_each_shading_rate_op(op0) { + maybe_for_each_shading_rate_op(op1) { + for (uint32_t x = 1; x <= 4; x *= 2) { + for (uint32_t y = 1; y <= 4; y *= 2) { + struct GENX(CPS_STATE) cps_state = { + .CoarsePixelShadingMode = CPS_MODE_CONSTANT, + .MinCPSizeX = x, + .MinCPSizeY = y, + }; + +#if GFX_VERx10 >= 125 + static const uint32_t combiner_ops[] = { + [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = PASSTHROUGH, + [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = OVERRIDE, + [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = HIGH_QUALITY, + [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = LOW_QUALITY, + [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = RELATIVE, + }; + + cps_state.Combiner0OpcodeforCPsize = combiner_ops[op0]; + cps_state.Combiner1OpcodeforCPsize = combiner_ops[op1]; +#endif /* GFX_VERx10 >= 125 */ + + for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) { + GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state); + cps_state_ptr += GENX(CPS_STATE_length) * 4; + } + } + } + } + } +#endif /* GFX_VER >= 12 */ +} + +#if GFX_VER >= 12 +static uint32_t +get_cps_state_offset(struct anv_device *device, bool cps_enabled, + const struct anv_dynamic_state *d) +{ + if (!cps_enabled) + return device->cps_states.offset; + + uint32_t offset; + static const uint32_t size_index[] = { + [1] = 0, + [2] = 1, + [4] = 2, + }; + +#if GFX_VERx10 >= 125 + offset = + 1 + /* skip disabled */ + d->fragment_shading_rate.ops[0] * 5 * 3 * 3 + + d->fragment_shading_rate.ops[1] * 3 * 3 + + size_index[d->fragment_shading_rate.rate.width] * 3 + + size_index[d->fragment_shading_rate.rate.height]; +#else + offset = + 1 + /* skip disabled */ + size_index[d->fragment_shading_rate.rate.width] * 3 + + size_index[d->fragment_shading_rate.rate.height]; +#endif + + offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4; + + return device->cps_states.offset + offset; +} +#endif /* GFX_VER >= 12 */ + void genX(emit_l3_config)(struct anv_batch *batch, const struct anv_device *device, @@ -602,7 +707,6 @@ genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples, void genX(emit_shading_rate)(struct anv_batch *batch, const struct anv_graphics_pipeline *pipeline, - struct anv_state cps_states, struct anv_dynamic_state *dynamic_state) { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); @@ -612,28 +716,34 @@ genX(emit_shading_rate)(struct anv_batch *batch, anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) { cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE; if (cps_enable) { - cps.MinCPSizeX = dynamic_state->fragment_shading_rate.width; - cps.MinCPSizeY = dynamic_state->fragment_shading_rate.height; + cps.MinCPSizeX = dynamic_state->fragment_shading_rate.rate.width; + cps.MinCPSizeY = dynamic_state->fragment_shading_rate.rate.height; } } -#elif GFX_VER == 12 - for (uint32_t i = 0; i < dynamic_state->viewport.count; i++) { - uint32_t *cps_state_dwords = - cps_states.map + GENX(CPS_STATE_length) * 4 * i; - struct GENX(CPS_STATE) cps_state = { - .CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE, - }; - - if (cps_enable) { - cps_state.MinCPSizeX = dynamic_state->fragment_shading_rate.width; - cps_state.MinCPSizeY = dynamic_state->fragment_shading_rate.height; - } - - GENX(CPS_STATE_pack)(NULL, cps_state_dwords, &cps_state); +#elif GFX_VER >= 12 + /* TODO: we can optimize this flush in the following cases: + * + * In the case where the last geometry shader emits a value that is not + * constant, we can avoid this stall because we can synchronize the + * pixel shader internally with + * 3DSTATE_PS::EnablePSDependencyOnCPsizeChange. + * + * If we know that the previous pipeline and the current one are using + * the same fragment shading rate. + */ + anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { +#if GFX_VERx10 >= 125 + pc.PSSStallSyncEnable = true; +#else + pc.PSDSyncEnable = true; +#endif } anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) { - cps.CoarsePixelShadingStateArrayPointer = cps_states.offset; + struct anv_device *device = pipeline->base.device; + + cps.CoarsePixelShadingStateArrayPointer = + get_cps_state_offset(device, cps_enable, dynamic_state); } #endif } diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index 94b813c6a78..fa4444971b7 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -420,6 +420,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic; +#if GFX_VER >= 11 + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) { + genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, + &cmd_buffer->state.gfx.dynamic); + } +#endif /* GFX_VER >= 11 */ + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) { uint32_t topology; if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) @@ -803,23 +810,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) } } -#if GFX_VER >= 11 - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) { - struct anv_state cps_states = ANV_STATE_NULL; - -#if GFX_VER >= 12 - uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count; - cps_states = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GENX(CPS_STATE_length) * 4 * count, - 32); -#endif /* GFX_VER >= 12 */ - - genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, cps_states, - &cmd_buffer->state.gfx.dynamic); - } -#endif /* GFX_VER >= 11 */ - cmd_buffer->state.gfx.dirty = 0; }