anv: Update VK_KHR_fragment_shading_rate for newer HW

Per primitive & attachment shading rate support added.

v2: Rebase on KHR_dynamic_rendering

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13739>
This commit is contained in:
Lionel Landwerlin
2021-02-05 21:16:38 +02:00
committed by Marge Bot
parent fc837e9f8b
commit 665ffd4bf9
13 changed files with 480 additions and 87 deletions

View File

@@ -70,6 +70,7 @@ genX_bits_included_symbols = [
'RENDER_SURFACE_STATE::Alpha Clear Color',
'CLEAR_COLOR',
'VERTEX_BUFFER_STATE::Buffer Starting Address',
'CPS_STATE',
]
genX_bits_h = custom_target(

View File

@@ -208,8 +208,10 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest,
ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
ANV_CMP_COPY(fragment_shading_rate.width, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
ANV_CMP_COPY(fragment_shading_rate.height, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
ANV_CMP_COPY(fragment_shading_rate.rate.width, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
ANV_CMP_COPY(fragment_shading_rate.rate.height, ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
ANV_CMP_COPY(fragment_shading_rate.ops[0], ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
ANV_CMP_COPY(fragment_shading_rate.ops[1], ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
#undef ANV_CMP_COPY
@@ -1338,6 +1340,25 @@ void anv_TrimCommandPool(
/* Nothing for us to do here. Our pools stay pretty tidy. */
}
/**
* Return NULL if the current subpass has no color attachment.
*/
const struct anv_image_view *
anv_cmd_buffer_get_first_color_view(const struct anv_cmd_buffer *cmd_buffer)
{
const struct anv_subpass *subpass = cmd_buffer->state.subpass;
if (subpass->color_count == 0)
return NULL;
const struct anv_image_view *iview =
cmd_buffer->state.attachments[subpass->color_attachments[0].attachment].image_view;
assert(iview->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT);
return iview;
}
/**
* Return NULL if the current subpass has no depthstencil attachment.
*/
@@ -1358,6 +1379,25 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer)
return iview;
}
/**
* Return NULL if the current subpass has no fragment shading rate attachment.
*/
const struct anv_image_view *
anv_cmd_buffer_get_fsr_view(const struct anv_cmd_buffer *cmd_buffer)
{
const struct anv_subpass *subpass = cmd_buffer->state.subpass;
if (subpass->fsr_attachment == NULL)
return NULL;
const struct anv_image_view *iview =
cmd_buffer->state.attachments[subpass->fsr_attachment->attachment].image_view;
assert(iview->image->vk.usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR);
return iview;
}
static struct anv_descriptor_set *
anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
@@ -1610,9 +1650,16 @@ void anv_CmdSetFragmentShadingRateKHR(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->state.gfx.dynamic.fragment_shading_rate = *pFragmentSize;
if (cmd_buffer->state.gfx.dynamic.fragment_shading_rate.rate.width != pFragmentSize->width ||
cmd_buffer->state.gfx.dynamic.fragment_shading_rate.rate.height != pFragmentSize->height ||
cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops[0] != combinerOps[0] ||
cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops[1] != combinerOps[1]) {
cmd_buffer->state.gfx.dynamic.fragment_shading_rate.rate = *pFragmentSize;
memcpy(cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops, combinerOps,
sizeof(cmd_buffer->state.gfx.dynamic.fragment_shading_rate.ops));
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
}
}
static inline uint32_t
ilog2_round_up(uint32_t value)

View File

@@ -60,6 +60,7 @@
#include "perf/intel_perf.h"
#include "genxml/gen7_pack.h"
#include "genxml/genX_bits.h"
static const driOptionDescription anv_dri_options[] = {
DRI_CONF_SECTION_PERFORMANCE
@@ -1554,7 +1555,10 @@ void anv_GetPhysicalDeviceFeatures2(
(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
features->attachmentFragmentShadingRate = false;
features->pipelineFragmentShadingRate = true;
features->primitiveFragmentShadingRate = false;
features->primitiveFragmentShadingRate =
pdevice->info.has_coarse_pixel_primitive_and_cb;
features->attachmentFragmentShadingRate =
pdevice->info.has_coarse_pixel_primitive_and_cb;
break;
}
@@ -2288,27 +2292,48 @@ void anv_GetPhysicalDeviceProperties2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
/* Those must be 0 if attachmentFragmentShadingRate is not
* supported.
*/
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
props->primitiveFragmentShadingRateWithMultipleViewports = false;
props->layeredShadingRateAttachments = false;
props->fragmentShadingRateNonTrivialCombinerOps = false;
props->primitiveFragmentShadingRateWithMultipleViewports =
pdevice->info.has_coarse_pixel_primitive_and_cb;
props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb;
props->fragmentShadingRateNonTrivialCombinerOps =
pdevice->info.has_coarse_pixel_primitive_and_cb;
props->maxFragmentSize = (VkExtent2D) { 4, 4 };
props->maxFragmentSizeAspectRatio = 4;
props->maxFragmentShadingRateCoverageSamples = 4 * 4 * 16;
props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT;
props->maxFragmentSizeAspectRatio =
pdevice->info.has_coarse_pixel_primitive_and_cb ?
2 : 4;
props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
(pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
props->maxFragmentShadingRateRasterizationSamples =
pdevice->info.has_coarse_pixel_primitive_and_cb ?
VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
props->fragmentShadingRateWithShaderDepthStencilWrites = false;
props->fragmentShadingRateWithSampleMask = true;
props->fragmentShadingRateWithShaderSampleMask = false;
props->fragmentShadingRateWithConservativeRasterization = true;
props->fragmentShadingRateWithFragmentShaderInterlock = true;
props->fragmentShadingRateWithCustomSampleLocations = true;
props->fragmentShadingRateStrictMultiplyCombiner = false;
/* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having
* the fix.
*/
props->fragmentShadingRateStrictMultiplyCombiner =
pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ?
pdevice->info.revision >= 8 :
pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ?
pdevice->info.revision >= 4 : true;
if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
} else {
/* Those must be 0 if attachmentFragmentShadingRate is not
* supported.
*/
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
}
break;
}
@@ -3233,6 +3258,28 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
goto fail_workaround_bo;
if (device->info.ver >= 12 &&
device->vk.enabled_extensions.KHR_fragment_shading_rate) {
uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
if (device->info.has_coarse_pixel_primitive_and_cb)
n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
n_cps_states += 1; /* Disable CPS */
/* Each of the combinaison must be replicated on all viewports */
n_cps_states *= MAX_VIEWPORTS;
device->cps_states =
anv_state_pool_alloc(&device->dynamic_state_pool,
n_cps_states * CPS_STATE_length(&device->info) * 4,
32);
if (device->cps_states.map == NULL)
goto fail_trivial_batch;
anv_genX(&device->info, init_cps_device_state)(device);
}
/* Allocate a null surface state at surface state offset 0. This makes
* NULL descriptor handling trivial because we can just memset structures
* to zero and they have a valid descriptor.
@@ -3277,6 +3324,7 @@ VkResult anv_CreateDevice(
anv_pipeline_cache_finish(&device->default_pipeline_cache);
fail_trivial_batch_bo_and_scratch_pool:
anv_scratch_pool_finish(device, &device->scratch_pool);
fail_trivial_batch:
anv_device_release_bo(device, device->trivial_batch_bo);
fail_workaround_bo:
anv_device_release_bo(device, device->workaround_bo);
@@ -3352,6 +3400,7 @@ void anv_DestroyDevice(
anv_state_reserved_pool_finish(&device->custom_border_colors);
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
#endif
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
@@ -4651,8 +4700,38 @@ VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&physical_device->isl_dev);
/* BSpec 47003: There are a number of restrictions on the sample count
* based off the coarse pixel size.
*/
static const VkSampleCountFlags cp_size_sample_limits[] = {
[1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
[2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
[4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
[8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
[16] = ISL_SAMPLE_COUNT_1_BIT,
};
for (uint32_t x = 4; x >= 1; x /= 2) {
for (uint32_t y = 4; y >= 1; y /= 2) {
if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
/* BSpec 47003:
* "CPsize 1x4 and 4x1 are not supported"
*/
if ((x == 1 && y == 4) || (x == 4 && y == 1))
continue;
/* For size {1, 1}, the sample count must be ~0
*
* 4x2 is also a specially case.
*/
if (x == 1 && y == 1)
append_rate(~0, x, y);
else if (x == 4 && y == 2)
append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
else
append_rate(cp_size_sample_limits[x * y], x, y);
} else {
/* For size {1, 1}, the sample count must be ~0 */
if (x == 1 && y == 1)
append_rate(~0, x, y);
@@ -4660,6 +4739,7 @@ VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
append_rate(sample_counts, x, y);
}
}
}
#undef append_rate

View File

@@ -807,6 +807,11 @@ anv_get_image_format_features2(const struct intel_device_info *devinfo,
}
}
if (devinfo->has_coarse_pixel_primitive_and_cb &&
vk_format == VK_FORMAT_R8_UINT &&
vk_tiling == VK_IMAGE_TILING_OPTIMAL)
flags |= VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
return flags;
}

View File

@@ -52,6 +52,8 @@ void genX(init_physical_device_state)(struct anv_physical_device *device);
VkResult genX(init_device_state)(struct anv_device *device);
void genX(init_cps_device_state)(struct anv_device *device);
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
@@ -132,7 +134,6 @@ void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
void genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline,
struct anv_state cps_states,
struct anv_dynamic_state *dynamic_state);
void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,

View File

@@ -216,6 +216,9 @@ choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
if (vk_usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
isl_usage |= ISL_SURF_USAGE_CPB_BIT;
if (vk_create_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
isl_usage |= ISL_SURF_USAGE_CUBE_BIT;

View File

@@ -247,12 +247,16 @@ num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
vk_find_struct_const(desc->pNext,
SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
const VkFragmentShadingRateAttachmentInfoKHR *fsr_attachment =
vk_find_struct_const(desc->pNext,
FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
return desc->inputAttachmentCount +
desc->colorAttachmentCount +
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
(desc->pDepthStencilAttachment != NULL) +
(ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
(ds_resolve && ds_resolve->pDepthStencilResolveAttachment) +
(fsr_attachment != NULL && fsr_attachment->pFragmentShadingRateAttachment);
}
VkResult anv_CreateRenderPass2(
@@ -391,6 +395,22 @@ VkResult anv_CreateRenderPass2(
subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
}
const VkFragmentShadingRateAttachmentInfoKHR *fsr_attachment =
vk_find_struct_const(desc->pNext,
FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
if (fsr_attachment && fsr_attachment->pFragmentShadingRateAttachment) {
subpass->fsr_attachment = subpass_attachments++;
*subpass->fsr_attachment = (struct anv_subpass_attachment) {
.usage = VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR,
.attachment = fsr_attachment->pFragmentShadingRateAttachment->attachment,
.layout = fsr_attachment->pFragmentShadingRateAttachment->layout,
};
subpass->fsr_extent = fsr_attachment->shadingRateAttachmentTexelSize;
}
}
for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
@@ -498,8 +518,8 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass,
const VkRenderingInfoKHR *info)
{
uint32_t att_count;
uint32_t color_count = 0, ds_count = 0;
uint32_t ds_idx;
uint32_t color_count = 0, ds_count = 0, fsr_count = 0;
uint32_t ds_idx, fsr_idx;
bool has_color_resolve, has_ds_resolve;
struct anv_render_pass *pass = &dyn_render_pass->pass;
@@ -514,6 +534,9 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass,
dyn_render_pass->suspending = info->flags & VK_RENDERING_SUSPENDING_BIT_KHR;
dyn_render_pass->resuming = info->flags & VK_RENDERING_RESUMING_BIT_KHR;
/* Get the total attachment count by counting color, depth & fragment
* shading rate views.
*/
color_count = info->colorAttachmentCount;
if ((info->pDepthAttachment && info->pDepthAttachment->imageView) ||
(info->pStencilAttachment && info->pStencilAttachment->imageView))
@@ -538,8 +561,15 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass,
if (has_ds_resolve)
ds_count *= 2;
att_count = color_count + ds_count;
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_attachment =
vk_find_struct_const(info->pNext,
RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
if (fsr_attachment && fsr_attachment->imageView != VK_NULL_HANDLE)
fsr_count = 1;
att_count = color_count + ds_count + fsr_count;
ds_idx = color_count;
fsr_idx = color_count + ds_count;
/* Setup pass & subpass */
*pass = (struct anv_render_pass) {
@@ -635,4 +665,20 @@ anv_dynamic_pass_init_full(struct anv_dynamic_render_pass *dyn_render_pass,
subpass->stencil_resolve_mode = stencil_resolve_mode;
}
}
if (fsr_count) {
ANV_FROM_HANDLE(anv_image_view, iview, fsr_attachment->imageView);
pass->attachments[fsr_idx] = (struct anv_render_pass_attachment) {
.format = iview->vk.format,
.samples = iview->vk.image->samples,
.usage = VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR,
};
*subpass->fsr_attachment = (struct anv_subpass_attachment) {
.usage = VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR,
.attachment = fsr_idx,
};
subpass->fsr_extent = fsr_attachment->shadingRateAttachmentTexelSize;
}
}

View File

@@ -324,8 +324,6 @@ void anv_DestroyPipeline(
if (gfx_pipeline->blend_state.map)
anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
if (gfx_pipeline->cps_state.map)
anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
if (gfx_pipeline->shaders[s])
@@ -2345,8 +2343,11 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
if (fsr_state) {
if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
dynamic->fragment_shading_rate = fsr_state->fragmentSize;
if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) {
dynamic->fragment_shading_rate.rate = fsr_state->fragmentSize;
memcpy(dynamic->fragment_shading_rate.ops, fsr_state->combinerOps,
sizeof(dynamic->fragment_shading_rate.ops));
}
}
pipeline->dynamic_state_mask = states;

View File

@@ -1194,6 +1194,16 @@ struct anv_device {
struct anv_state slice_hash;
/** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
*
* We need to emit CPS_STATE structures for each viewport accessible by a
* pipeline. So rather than write many identical CPS_STATE structures
* dynamically, we can enumerate all possible combinaisons and then just
* emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
* array.
*/
struct anv_state cps_states;
uint32_t queue_count;
struct anv_queue * queues;
@@ -2696,7 +2706,10 @@ struct anv_dynamic_state {
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
} sample_locations;
VkExtent2D fragment_shading_rate;
struct {
VkExtent2D rate;
VkFragmentShadingRateCombinerOpKHR ops[2];
} fragment_shading_rate;
VkCullModeFlags cull_mode;
VkFrontFace front_face;
@@ -2951,6 +2964,9 @@ struct anv_subpass {
VkResolveModeFlagBitsKHR depth_resolve_mode;
VkResolveModeFlagBitsKHR stencil_resolve_mode;
struct anv_subpass_attachment * fsr_attachment;
VkExtent2D fsr_extent;
uint32_t view_mask;
/** Subpass has a depth/stencil self-dependency */
@@ -2994,8 +3010,9 @@ struct anv_render_pass {
/* RTs * 2 (for resolve attachments)
* depth/sencil * 2
* fragment shading rate * 1
*/
#define MAX_DYN_RENDER_ATTACHMENTS (MAX_RTS * 2 + 2 * 2)
#define MAX_DYN_RENDER_ATTACHMENTS (MAX_RTS * 2 + 2 * 2 + 1)
/* And this, kids, is what we call a nasty hack. */
struct anv_dynamic_render_pass {
@@ -3271,9 +3288,15 @@ anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
const struct anv_image_view *
anv_cmd_buffer_get_first_color_view(const struct anv_cmd_buffer *cmd_buffer);
const struct anv_image_view *
anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
const struct anv_image_view *
anv_cmd_buffer_get_fsr_view(const struct anv_cmd_buffer *cmd_buffer);
VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
uint32_t num_entries,
@@ -3499,8 +3522,6 @@ struct anv_graphics_pipeline {
struct anv_state blend_state;
struct anv_state cps_state;
uint32_t vb_used;
struct anv_pipeline_vertex_binding {
uint32_t stride;

View File

@@ -6011,6 +6011,42 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.hiz_enabled = isl_aux_usage_has_hiz(info.hiz_usage);
}
static void
cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VERx10 >= 125
struct anv_device *device = cmd_buffer->device;
if (!device->vk.enabled_extensions.KHR_fragment_shading_rate)
return;
uint32_t *dw = anv_batch_emit_dwords(&cmd_buffer->batch,
device->isl_dev.cpb.size / 4);
if (dw == NULL)
return;
struct isl_cpb_emit_info info = { };
const struct anv_image_view *fsr_iview =
anv_cmd_buffer_get_fsr_view(cmd_buffer);
if (fsr_iview) {
info.view = &fsr_iview->planes[0].isl;
info.surf = &fsr_iview->image->planes[0].primary_surface.isl;
info.address =
anv_batch_emit_reloc(&cmd_buffer->batch,
dw + device->isl_dev.cpb.offset / 4,
fsr_iview->image->bindings[0].address.bo,
fsr_iview->image->bindings[0].address.offset +
fsr_iview->image->bindings[0].memory_range.offset);
info.mocs =
anv_mocs(device, fsr_iview->image->bindings[0].address.bo,
ISL_SURF_USAGE_CPB_BIT);
}
isl_emit_cpb_control_s(&device->isl_dev, dw, &info);
#endif /* GFX_VERx10 >= 125 */
}
/**
* This ANDs the view mask of the current subpass with the pending clear
* views in the attachment to get the mask of views active in the subpass
@@ -6269,12 +6305,13 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
continue;
assert(a < cmd_state->pass->attachment_count);
struct anv_subpass_attachment *att = &subpass->attachments[i];
struct anv_attachment_state *att_state = &cmd_state->attachments[a];
struct anv_image_view *iview = cmd_state->attachments[a].image_view;
struct anv_image_view *iview = att_state->image_view;
const struct anv_image *image = iview->image;
VkImageLayout target_layout = subpass->attachments[i].layout;
VkImageLayout target_layout = att->layout;
VkImageLayout target_stencil_layout =
subpass->attachments[i].stencil_layout;
@@ -6295,6 +6332,22 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
layer_count = fb->layers;
}
/* Treat the fragment shading rate attachment as color. But make sure we
* don't use fb->layers if the fragment shading rate attachment only has
* one layer.
*
* Vulkan spec 1.2.170 - VkFramebufferCreateInfo :
*
* "each element of pAttachments that is used as a fragment shading
* rate attachment by renderPass must have a layerCount that is
* either 1, or greater than layers"
*/
if ((att->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) &&
iview->planes[0].isl.array_len == 1) {
base_layer = 0;
layer_count = 1;
}
if (image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
bool will_full_fast_clear =
(att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) &&
@@ -6464,6 +6517,8 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
#endif
cmd_buffer_emit_depth_stencil(cmd_buffer);
cmd_buffer_emit_cps_control_buffer(cmd_buffer);
}
static enum blorp_filter
@@ -6820,8 +6875,9 @@ cmd_buffer_do_layout_transitions(struct anv_cmd_buffer *cmd_buffer,
continue;
assert(a < cmd_state->pass->attachment_count);
struct anv_subpass_attachment *att = &subpass->attachments[i];
struct anv_attachment_state *att_state = &cmd_state->attachments[a];
struct anv_image_view *iview = cmd_state->attachments[a].image_view;
struct anv_image_view *iview = att_state->image_view;
const struct anv_image *image = iview->image;
/* Transition the image into the final layout for this render pass */
@@ -6840,6 +6896,22 @@ cmd_buffer_do_layout_transitions(struct anv_cmd_buffer *cmd_buffer,
layer_count = fb->layers;
}
/* Treat the fragment shading rate attachment as color. But make sure we
* don't use fb->layers if the fragment shading rate attachment only has
* one layer.
*
* Vulkan spec 1.2.170 - VkFramebufferCreateInfo :
*
* "each element of pAttachments that is used as a fragment shading
* rate attachment by renderPass must have a layerCount that is
* either 1, or greater than layers"
*/
if (att->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR &&
iview->planes[0].isl.array_len == 1) {
base_layer = 0;
layer_count = 1;
}
if (image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
transition_color_buffer(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
@@ -7141,6 +7213,18 @@ genX(cmd_buffer_setup_attachments_dynrender)(struct anv_cmd_buffer *cmd_buffer,
}
}
if (subpass->fsr_attachment) {
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_att_info =
vk_find_struct_const(info->pNext,
RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
assert(fsr_att_info);
struct anv_attachment_state *fsr_att_state =
&state->attachments[subpass->fsr_attachment->attachment];
fsr_att_state->image_view =
anv_image_view_from_handle(fsr_att_info->imageView);
}
return VK_SUCCESS;
}
@@ -7251,6 +7335,8 @@ cmd_buffer_begin_rendering(struct anv_cmd_buffer *cmd_buffer,
#endif
cmd_buffer_emit_depth_stencil(cmd_buffer);
cmd_buffer_emit_cps_control_buffer(cmd_buffer);
}
static void

View File

@@ -847,22 +847,16 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
sm.SampleMask = sample_mask;
}
}
pipeline->cps_state = ANV_STATE_NULL;
static void
emit_3dstate_cps(struct anv_graphics_pipeline *pipeline, uint32_t dynamic_states)
{
#if GFX_VER >= 11
if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) &&
pipeline->base.device->vk.enabled_extensions.KHR_fragment_shading_rate) {
#if GFX_VER >= 12
struct anv_device *device = pipeline->base.device;
const uint32_t num_dwords =
GENX(CPS_STATE_length) * 4 * pipeline->dynamic_state.viewport.count;
pipeline->cps_state =
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords, 32);
#endif
genX(emit_shading_rate)(&pipeline->base.batch,
pipeline,
pipeline->cps_state,
&pipeline->dynamic_state);
}
#endif
@@ -2407,6 +2401,12 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients;
ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch;
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses
* a fragment shading rate that is not constant.
*/
ps.EnablePSDependencyOnCPsizeChange = wm_prog_data->per_coarse_pixel_dispatch;
#endif
}
}
@@ -2612,6 +2612,8 @@ genX(graphics_pipeline_create)(
emit_3dstate_vf_statistics(pipeline);
emit_3dstate_cps(pipeline, dynamic_states);
emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState,
dynamic_states);
}

View File

@@ -358,6 +358,111 @@ genX(init_device_state)(struct anv_device *device)
return res;
}
#if GFX_VERx10 >= 125
#define maybe_for_each_shading_rate_op(name) \
for (VkFragmentShadingRateCombinerOpKHR name = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; \
name <= VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR; \
name++)
#elif GFX_VER >= 12
#define maybe_for_each_shading_rate_op(name)
#endif
/* Rather than reemitting the CPS_STATE structure everything those changes and
* for as many viewports as needed, we can just prepare all possible cases and
* just pick the right offset from the prepacked states when needed.
*/
void
genX(init_cps_device_state)(struct anv_device *device)
{
#if GFX_VER >= 12
void *cps_state_ptr = device->cps_states.map;
/* Disabled CPS mode */
for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) {
struct GENX(CPS_STATE) cps_state = {
.CoarsePixelShadingMode = CPS_MODE_CONSTANT,
.MinCPSizeX = 1,
.MinCPSizeY = 1,
#if GFX_VERx10 >= 125
.Combiner0OpcodeforCPsize = PASSTHROUGH,
.Combiner1OpcodeforCPsize = PASSTHROUGH,
#endif /* GFX_VERx10 >= 125 */
};
GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state);
cps_state_ptr += GENX(CPS_STATE_length) * 4;
}
maybe_for_each_shading_rate_op(op0) {
maybe_for_each_shading_rate_op(op1) {
for (uint32_t x = 1; x <= 4; x *= 2) {
for (uint32_t y = 1; y <= 4; y *= 2) {
struct GENX(CPS_STATE) cps_state = {
.CoarsePixelShadingMode = CPS_MODE_CONSTANT,
.MinCPSizeX = x,
.MinCPSizeY = y,
};
#if GFX_VERx10 >= 125
static const uint32_t combiner_ops[] = {
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = PASSTHROUGH,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = OVERRIDE,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = HIGH_QUALITY,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = LOW_QUALITY,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = RELATIVE,
};
cps_state.Combiner0OpcodeforCPsize = combiner_ops[op0];
cps_state.Combiner1OpcodeforCPsize = combiner_ops[op1];
#endif /* GFX_VERx10 >= 125 */
for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) {
GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state);
cps_state_ptr += GENX(CPS_STATE_length) * 4;
}
}
}
}
}
#endif /* GFX_VER >= 12 */
}
#if GFX_VER >= 12
static uint32_t
get_cps_state_offset(struct anv_device *device, bool cps_enabled,
const struct anv_dynamic_state *d)
{
if (!cps_enabled)
return device->cps_states.offset;
uint32_t offset;
static const uint32_t size_index[] = {
[1] = 0,
[2] = 1,
[4] = 2,
};
#if GFX_VERx10 >= 125
offset =
1 + /* skip disabled */
d->fragment_shading_rate.ops[0] * 5 * 3 * 3 +
d->fragment_shading_rate.ops[1] * 3 * 3 +
size_index[d->fragment_shading_rate.rate.width] * 3 +
size_index[d->fragment_shading_rate.rate.height];
#else
offset =
1 + /* skip disabled */
size_index[d->fragment_shading_rate.rate.width] * 3 +
size_index[d->fragment_shading_rate.rate.height];
#endif
offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
return device->cps_states.offset + offset;
}
#endif /* GFX_VER >= 12 */
void
genX(emit_l3_config)(struct anv_batch *batch,
const struct anv_device *device,
@@ -602,7 +707,6 @@ genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
void
genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline,
struct anv_state cps_states,
struct anv_dynamic_state *dynamic_state)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
@@ -612,28 +716,34 @@ genX(emit_shading_rate)(struct anv_batch *batch,
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE;
if (cps_enable) {
cps.MinCPSizeX = dynamic_state->fragment_shading_rate.width;
cps.MinCPSizeY = dynamic_state->fragment_shading_rate.height;
cps.MinCPSizeX = dynamic_state->fragment_shading_rate.rate.width;
cps.MinCPSizeY = dynamic_state->fragment_shading_rate.rate.height;
}
}
#elif GFX_VER == 12
for (uint32_t i = 0; i < dynamic_state->viewport.count; i++) {
uint32_t *cps_state_dwords =
cps_states.map + GENX(CPS_STATE_length) * 4 * i;
struct GENX(CPS_STATE) cps_state = {
.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE,
};
if (cps_enable) {
cps_state.MinCPSizeX = dynamic_state->fragment_shading_rate.width;
cps_state.MinCPSizeY = dynamic_state->fragment_shading_rate.height;
}
GENX(CPS_STATE_pack)(NULL, cps_state_dwords, &cps_state);
#elif GFX_VER >= 12
/* TODO: we can optimize this flush in the following cases:
*
* In the case where the last geometry shader emits a value that is not
* constant, we can avoid this stall because we can synchronize the
* pixel shader internally with
* 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
*
* If we know that the previous pipeline and the current one are using
* the same fragment shading rate.
*/
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
#if GFX_VERx10 >= 125
pc.PSSStallSyncEnable = true;
#else
pc.PSDSyncEnable = true;
#endif
}
anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) {
cps.CoarsePixelShadingStateArrayPointer = cps_states.offset;
struct anv_device *device = pipeline->base.device;
cps.CoarsePixelShadingStateArrayPointer =
get_cps_state_offset(device, cps_enable, dynamic_state);
}
#endif
}

View File

@@ -420,6 +420,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
#if GFX_VER >= 11
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) {
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline,
&cmd_buffer->state.gfx.dynamic);
}
#endif /* GFX_VER >= 11 */
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {
uint32_t topology;
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
@@ -803,23 +810,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
}
}
#if GFX_VER >= 11
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) {
struct anv_state cps_states = ANV_STATE_NULL;
#if GFX_VER >= 12
uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;
cps_states =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(CPS_STATE_length) * 4 * count,
32);
#endif /* GFX_VER >= 12 */
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, cps_states,
&cmd_buffer->state.gfx.dynamic);
}
#endif /* GFX_VER >= 11 */
cmd_buffer->state.gfx.dirty = 0;
}