anv: rework sample location

On Gfx7 we can only give the sample location for a given multisample
number. This means everytime the multisampling value changes, we have
to re-emit the locations. It's fine because it's also where
(3DSTATE_MULTISAMPLE) the number of samples is stored.

On Gfx8+ though, 3DSTATE_MULTISAMPLE only holds the number of samples
and all the sample locations for all number of samples are located in
3DSTATE_SAMPLE_PATTERN. So to be more effecient there, we need to
track the locations for all sample numbers and compare new values with
the relevant sample count when touching the dynamic state.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16220>
This commit is contained in:
Lionel Landwerlin
2022-04-27 23:44:40 +03:00
committed by Marge Bot
parent 810518fda7
commit 168b13364f
9 changed files with 153 additions and 121 deletions

View File

@@ -107,6 +107,23 @@ const struct anv_dynamic_state default_dynamic_state = {
.logic_op = 0, .logic_op = 0,
}; };
void
anv_dynamic_state_init(struct anv_dynamic_state *state)
{
*state = default_dynamic_state;
#define INIT_LOCATIONS(idx) \
memcpy(state->sample_locations.locations_##idx, \
intel_sample_positions_##idx##x, \
sizeof(state->sample_locations.locations_##idx))
INIT_LOCATIONS(1);
INIT_LOCATIONS(2);
INIT_LOCATIONS(4);
INIT_LOCATIONS(8);
INIT_LOCATIONS(16);
#undef INIT_LOCATIONS
}
/** /**
* Copy the dynamic state from src to dest based on the copy_mask. * Copy the dynamic state from src to dest based on the copy_mask.
* *
@@ -199,10 +216,26 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest,
ANV_CMP_COPY(logic_op, ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP); ANV_CMP_COPY(logic_op, ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP);
if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
typed_memcpy(dest->sample_locations.locations, #define ANV_CMP_COPY_LOCATIONS(idx) \
src->sample_locations.locations, if (memcmp(dest->sample_locations.locations_##idx, \
ARRAY_SIZE(src->sample_locations.locations)); src->sample_locations.locations_##idx, \
changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; sizeof(src->sample_locations.locations_##idx))) { \
typed_memcpy(dest->sample_locations.locations_##idx, \
src->sample_locations.locations_##idx, \
ARRAY_SIZE(src->sample_locations.locations_##idx)); \
changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; \
}
switch (src->sample_locations.pipeline_samples) {
case 1: ANV_CMP_COPY_LOCATIONS(1); break;
case 2: ANV_CMP_COPY_LOCATIONS(2); break;
case 4: ANV_CMP_COPY_LOCATIONS(4); break;
case 8: ANV_CMP_COPY_LOCATIONS(8); break;
case 16: ANV_CMP_COPY_LOCATIONS(16); break;
default: unreachable("invalid sample count");
}
#undef ANV_CMP_COPY_LOCATIONS
} }
ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE); ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
@@ -226,7 +259,7 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
state->current_pipeline = UINT32_MAX; state->current_pipeline = UINT32_MAX;
state->restart_index = UINT32_MAX; state->restart_index = UINT32_MAX;
state->gfx.dynamic = default_dynamic_state; anv_dynamic_state_init(&state->gfx.dynamic);
} }
static void static void
@@ -855,11 +888,16 @@ void anv_CmdSetSampleLocationsEXT(
struct anv_dynamic_state *dyn_state = &cmd_buffer->state.gfx.dynamic; struct anv_dynamic_state *dyn_state = &cmd_buffer->state.gfx.dynamic;
uint32_t samples = pSampleLocationsInfo->sampleLocationsPerPixel; uint32_t samples = pSampleLocationsInfo->sampleLocationsPerPixel;
struct intel_sample_position *positions =
typed_memcpy(dyn_state->sample_locations.locations, anv_dynamic_state_get_sample_locations(dyn_state, samples);
pSampleLocationsInfo->pSampleLocations, samples); for (uint32_t i = 0; i < samples; i++) {
if (positions[i].x != pSampleLocationsInfo->pSampleLocations[i].x ||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; positions[i].y != pSampleLocationsInfo->pSampleLocations[i].y) {
positions[i].x = pSampleLocationsInfo->pSampleLocations[i].x;
positions[i].y = pSampleLocationsInfo->pSampleLocations[i].y;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
}
}
} }
void anv_CmdSetLineStippleEXT( void anv_CmdSetLineStippleEXT(

View File

@@ -36,6 +36,8 @@
#error This file is included by means other than anv_private.h #error This file is included by means other than anv_private.h
#endif #endif
struct intel_sample_positions;
extern const uint32_t genX(vk_to_intel_cullmode)[]; extern const uint32_t genX(vk_to_intel_cullmode)[];
extern const uint32_t genX(vk_to_intel_front_face)[]; extern const uint32_t genX(vk_to_intel_front_face)[];
@@ -129,10 +131,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
enum intel_urb_deref_block_size *deref_block_size); enum intel_urb_deref_block_size *deref_block_size);
void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations); const struct intel_sample_position *positions);
void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples, void genX(emit_sample_pattern)(struct anv_batch *batch,
const VkSampleLocationEXT *locations); const struct anv_dynamic_state *dynamic_state);
void genX(emit_shading_rate)(struct anv_batch *batch, void genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline, const struct anv_graphics_pipeline *pipeline,

View File

@@ -2110,7 +2110,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
{ {
anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
pipeline->dynamic_state = default_dynamic_state; anv_dynamic_state_init(&pipeline->dynamic_state);
states &= ~pipeline->dynamic_states; states &= ~pipeline->dynamic_states;
@@ -2314,22 +2314,33 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ? const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL; vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
uint32_t samples = ms_info ? ms_info->rasterizationSamples : 1; uint32_t samples = MAX2(1, ms_info ? ms_info->rasterizationSamples : 1);
struct intel_sample_position *locations;
switch (samples) {
case 1: locations = dynamic->sample_locations.locations_1; break;
case 2: locations = dynamic->sample_locations.locations_2; break;
case 4: locations = dynamic->sample_locations.locations_4; break;
case 8: locations = dynamic->sample_locations.locations_8; break;
case 16: locations = dynamic->sample_locations.locations_16; break;
default: unreachable("invalid sample count");
}
if (sl_info) { if (sl_info) {
const VkSampleLocationEXT *positions = const VkSampleLocationEXT *positions =
sl_info->sampleLocationsInfo.pSampleLocations; sl_info->sampleLocationsInfo.pSampleLocations;
for (uint32_t i = 0; i < samples; i++) { for (uint32_t i = 0; i < samples; i++) {
dynamic->sample_locations.locations[i].x = positions[i].x; locations[i].x = positions[i].x;
dynamic->sample_locations.locations[i].y = positions[i].y; locations[i].y = positions[i].y;
} }
} else { } else {
const struct intel_sample_position *positions = const struct intel_sample_position *positions =
intel_get_sample_positions(samples); intel_get_sample_positions(samples);
for (uint32_t i = 0; i < samples; i++) { for (uint32_t i = 0; i < samples; i++) {
dynamic->sample_locations.locations[i].x = positions[i].x; locations[i].x = positions[i].x;
dynamic->sample_locations.locations[i].y = positions[i].y; locations[i].y = positions[i].y;
} }
} }
dynamic->sample_locations.pipeline_samples = samples;
} }
if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {

View File

@@ -49,6 +49,7 @@
#include "common/intel_gem.h" #include "common/intel_gem.h"
#include "common/intel_l3_config.h" #include "common/intel_l3_config.h"
#include "common/intel_measure.h" #include "common/intel_measure.h"
#include "common/intel_sample_positions.h"
#include "dev/intel_device_info.h" #include "dev/intel_device_info.h"
#include "blorp/blorp.h" #include "blorp/blorp.h"
#include "compiler/brw_compiler.h" #include "compiler/brw_compiler.h"
@@ -2707,7 +2708,13 @@ struct anv_dynamic_state {
} line_stipple; } line_stipple;
struct { struct {
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS]; struct intel_sample_position locations_1[1];
struct intel_sample_position locations_2[2];
struct intel_sample_position locations_4[4];
struct intel_sample_position locations_8[8];
struct intel_sample_position locations_16[16];
/* Only valid on the pipeline dynamic state */
unsigned pipeline_samples;
} sample_locations; } sample_locations;
struct { struct {
@@ -2736,10 +2743,25 @@ struct anv_dynamic_state {
extern const struct anv_dynamic_state default_dynamic_state; extern const struct anv_dynamic_state default_dynamic_state;
void anv_dynamic_state_init(struct anv_dynamic_state *state);
uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest, uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
const struct anv_dynamic_state *src, const struct anv_dynamic_state *src,
uint32_t copy_mask); uint32_t copy_mask);
static inline struct intel_sample_position *
anv_dynamic_state_get_sample_locations(struct anv_dynamic_state *state,
unsigned samples)
{
switch (samples) {
case 1: return state->sample_locations.locations_1; break;
case 2: return state->sample_locations.locations_2; break;
case 4: return state->sample_locations.locations_4; break;
case 8: return state->sample_locations.locations_8; break;
case 16: return state->sample_locations.locations_16; break;
default: unreachable("invalid sample count");
}
}
struct anv_surface_state { struct anv_surface_state {
struct anv_state state; struct anv_state state;
/** Address of the surface referred to by this state /** Address of the surface referred to by this state

View File

@@ -1788,6 +1788,18 @@ genX(BeginCommandBuffer)(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
} }
#if GFX_VER >= 8
/* Emit the sample pattern at the beginning of the batch because the
* default locations emitted at the device initialization might have been
* changed by a previous command buffer.
*
* Do not change that when we're continuing a previous renderpass.
*/
if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT))
genX(emit_sample_pattern)(&cmd_buffer->batch, NULL);
#endif
#if GFX_VERx10 >= 75 #if GFX_VERx10 >= 75
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info = const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =

View File

@@ -929,24 +929,6 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
NULL); NULL);
#endif #endif
/* If EXT_sample_locations is enabled and the sample locations are not
* dynamic, then we need to emit those position in the pipeline batch. On
* Gfx8+ this is part of 3DSTATE_SAMPLE_PATTERN, prior to that this is in
* 3DSTATE_MULTISAMPLE.
*/
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
#if GFX_VER >= 8
genX(emit_sample_pattern)(&pipeline->base.batch,
pipeline->rasterization_samples,
pipeline->dynamic_state.sample_locations.locations);
#else
genX(emit_multisample)(&pipeline->base.batch,
pipeline->rasterization_samples,
pipeline->dynamic_state.sample_locations.locations);
#endif
}
/* From the Vulkan 1.0 spec: /* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits * If pSampleMask is NULL, it is treated as if the mask has all bits
* enabled, i.e. no coverage is removed from fragments. * enabled, i.e. no coverage is removed from fragments.

View File

@@ -256,7 +256,7 @@ init_render_queue_state(struct anv_queue *queue)
#if GFX_VER >= 8 #if GFX_VER >= 8
anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck); anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck);
genX(emit_sample_pattern)(&batch, 0, NULL); genX(emit_sample_pattern)(&batch, NULL);
/* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the /* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the
* section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer * section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer
@@ -646,7 +646,7 @@ genX(emit_l3_config)(struct anv_batch *batch,
void void
genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations) const struct intel_sample_position *positions)
{ {
anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) { anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
ms.NumberofMultisamples = __builtin_ffs(samples) - 1; ms.NumberofMultisamples = __builtin_ffs(samples) - 1;
@@ -660,41 +660,21 @@ genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
*/ */
ms.PixelPositionOffsetEnable = false; ms.PixelPositionOffsetEnable = false;
#else #else
switch (samples) {
if (locations) { case 1:
switch (samples) { INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, positions);
case 1: break;
INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, locations); case 2:
INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, positions);
break;
case 4:
INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, positions);
break;
case 8:
INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, positions);
break;
default:
break; break;
case 2:
INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, locations);
break;
case 4:
INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, locations);
break;
case 8:
INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, locations);
break;
default:
break;
}
} else {
switch (samples) {
case 1:
INTEL_SAMPLE_POS_1X(ms.Sample);
break;
case 2:
INTEL_SAMPLE_POS_2X(ms.Sample);
break;
case 4:
INTEL_SAMPLE_POS_4X(ms.Sample);
break;
case 8:
INTEL_SAMPLE_POS_8X(ms.Sample);
break;
default:
break;
}
} }
#endif #endif
} }
@@ -702,53 +682,38 @@ genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
#if GFX_VER >= 8 #if GFX_VER >= 8
void void
genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples, genX(emit_sample_pattern)(struct anv_batch *batch,
const VkSampleLocationEXT *locations) const struct anv_dynamic_state *d)
{ {
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
* VkPhysicalDeviceFeatures::standardSampleLocations. * VkPhysicalDeviceFeatures::standardSampleLocations.
*/ */
anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) { anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
if (locations) { /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:
/* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says: *
* * "When programming the sample offsets (for NUMSAMPLES_4 or _8
* "When programming the sample offsets (for NUMSAMPLES_4 or _8 * and MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3
* and MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 * (or 7 for 8X, or 15 for 16X) must have monotonically increasing
* (or 7 for 8X, or 15 for 16X) must have monotonically increasing * distance from the pixel center. This is required to get the
* distance from the pixel center. This is required to get the * correct centroid computation in the device."
* correct centroid computation in the device." *
* * However, the Vulkan spec seems to require that the the samples occur
* However, the Vulkan spec seems to require that the the samples * in the order provided through the API. The standard sample patterns
* occur in the order provided through the API. The standard sample * have the above property that they have monotonically increasing
* patterns have the above property that they have monotonically * distances from the center but client-provided ones do not. As long as
* increasing distances from the center but client-provided ones do * this only affects centroid calculations as the docs say, we should be
* not. As long as this only affects centroid calculations as the * ok because OpenGL and Vulkan only require that the centroid be some
* docs say, we should be ok because OpenGL and Vulkan only require * lit sample and that it's the same for all samples in a pixel; they
* that the centroid be some lit sample and that it's the same for * have no requirement that it be the one closest to center.
* all samples in a pixel; they have no requirement that it be the */
* one closest to center. if (d) {
*/ INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, d->sample_locations.locations_1);
switch (samples) { INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, d->sample_locations.locations_2);
case 1: INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, d->sample_locations.locations_4);
INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, locations); INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, d->sample_locations.locations_8);
break;
case 2:
INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, locations);
break;
case 4:
INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, locations);
break;
case 8:
INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, locations);
break;
#if GFX_VER >= 9 #if GFX_VER >= 9
case 16: INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, d->sample_locations.locations_16);
INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, locations);
break;
#endif #endif
default:
break;
}
} else { } else {
INTEL_SAMPLE_POS_1X(sp._1xSample); INTEL_SAMPLE_POS_1X(sp._1xSample);
INTEL_SAMPLE_POS_2X(sp._2xSample); INTEL_SAMPLE_POS_2X(sp._2xSample);

View File

@@ -301,10 +301,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm); anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
} }
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
genX(emit_multisample)(&cmd_buffer->batch, genX(emit_multisample)(&cmd_buffer->batch,
pipeline->rasterization_samples, pipeline->rasterization_samples,
d->sample_locations.locations); anv_dynamic_state_get_sample_locations(d,
pipeline->rasterization_samples));
} }
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |

View File

@@ -622,11 +622,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
} }
#endif #endif
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
genX(emit_sample_pattern)(&cmd_buffer->batch, cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
pipeline->rasterization_samples, genX(emit_sample_pattern)(&cmd_buffer->batch, d);
d->sample_locations.locations);
}
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |