anv: rework sample location

On Gfx7 we can only give the sample location for a given multisample
number. This means everytime the multisampling value changes, we have
to re-emit the locations. It's fine because it's also where
(3DSTATE_MULTISAMPLE) the number of samples is stored.

On Gfx8+ though, 3DSTATE_MULTISAMPLE only holds the number of samples
and all the sample locations for all number of samples are located in
3DSTATE_SAMPLE_PATTERN. So to be more effecient there, we need to
track the locations for all sample numbers and compare new values with
the relevant sample count when touching the dynamic state.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16220>
This commit is contained in:
Lionel Landwerlin
2022-04-27 23:44:40 +03:00
committed by Marge Bot
parent 810518fda7
commit 168b13364f
9 changed files with 153 additions and 121 deletions

View File

@@ -107,6 +107,23 @@ const struct anv_dynamic_state default_dynamic_state = {
.logic_op = 0,
};
void
anv_dynamic_state_init(struct anv_dynamic_state *state)
{
*state = default_dynamic_state;
#define INIT_LOCATIONS(idx) \
memcpy(state->sample_locations.locations_##idx, \
intel_sample_positions_##idx##x, \
sizeof(state->sample_locations.locations_##idx))
INIT_LOCATIONS(1);
INIT_LOCATIONS(2);
INIT_LOCATIONS(4);
INIT_LOCATIONS(8);
INIT_LOCATIONS(16);
#undef INIT_LOCATIONS
}
/**
* Copy the dynamic state from src to dest based on the copy_mask.
*
@@ -199,10 +216,26 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest,
ANV_CMP_COPY(logic_op, ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP);
if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
typed_memcpy(dest->sample_locations.locations,
src->sample_locations.locations,
ARRAY_SIZE(src->sample_locations.locations));
changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
#define ANV_CMP_COPY_LOCATIONS(idx) \
if (memcmp(dest->sample_locations.locations_##idx, \
src->sample_locations.locations_##idx, \
sizeof(src->sample_locations.locations_##idx))) { \
typed_memcpy(dest->sample_locations.locations_##idx, \
src->sample_locations.locations_##idx, \
ARRAY_SIZE(src->sample_locations.locations_##idx)); \
changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; \
}
switch (src->sample_locations.pipeline_samples) {
case 1: ANV_CMP_COPY_LOCATIONS(1); break;
case 2: ANV_CMP_COPY_LOCATIONS(2); break;
case 4: ANV_CMP_COPY_LOCATIONS(4); break;
case 8: ANV_CMP_COPY_LOCATIONS(8); break;
case 16: ANV_CMP_COPY_LOCATIONS(16); break;
default: unreachable("invalid sample count");
}
#undef ANV_CMP_COPY_LOCATIONS
}
ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
@@ -226,7 +259,7 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
state->current_pipeline = UINT32_MAX;
state->restart_index = UINT32_MAX;
state->gfx.dynamic = default_dynamic_state;
anv_dynamic_state_init(&state->gfx.dynamic);
}
static void
@@ -855,11 +888,16 @@ void anv_CmdSetSampleLocationsEXT(
struct anv_dynamic_state *dyn_state = &cmd_buffer->state.gfx.dynamic;
uint32_t samples = pSampleLocationsInfo->sampleLocationsPerPixel;
typed_memcpy(dyn_state->sample_locations.locations,
pSampleLocationsInfo->pSampleLocations, samples);
struct intel_sample_position *positions =
anv_dynamic_state_get_sample_locations(dyn_state, samples);
for (uint32_t i = 0; i < samples; i++) {
if (positions[i].x != pSampleLocationsInfo->pSampleLocations[i].x ||
positions[i].y != pSampleLocationsInfo->pSampleLocations[i].y) {
positions[i].x = pSampleLocationsInfo->pSampleLocations[i].x;
positions[i].y = pSampleLocationsInfo->pSampleLocations[i].y;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
}
}
}
void anv_CmdSetLineStippleEXT(

View File

@@ -36,6 +36,8 @@
#error This file is included by means other than anv_private.h
#endif
struct intel_sample_positions;
extern const uint32_t genX(vk_to_intel_cullmode)[];
extern const uint32_t genX(vk_to_intel_front_face)[];
@@ -129,10 +131,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
enum intel_urb_deref_block_size *deref_block_size);
void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations);
const struct intel_sample_position *positions);
void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations);
void genX(emit_sample_pattern)(struct anv_batch *batch,
const struct anv_dynamic_state *dynamic_state);
void genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline,

View File

@@ -2110,7 +2110,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
{
anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
pipeline->dynamic_state = default_dynamic_state;
anv_dynamic_state_init(&pipeline->dynamic_state);
states &= ~pipeline->dynamic_states;
@@ -2314,22 +2314,33 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
uint32_t samples = ms_info ? ms_info->rasterizationSamples : 1;
uint32_t samples = MAX2(1, ms_info ? ms_info->rasterizationSamples : 1);
struct intel_sample_position *locations;
switch (samples) {
case 1: locations = dynamic->sample_locations.locations_1; break;
case 2: locations = dynamic->sample_locations.locations_2; break;
case 4: locations = dynamic->sample_locations.locations_4; break;
case 8: locations = dynamic->sample_locations.locations_8; break;
case 16: locations = dynamic->sample_locations.locations_16; break;
default: unreachable("invalid sample count");
}
if (sl_info) {
const VkSampleLocationEXT *positions =
sl_info->sampleLocationsInfo.pSampleLocations;
for (uint32_t i = 0; i < samples; i++) {
dynamic->sample_locations.locations[i].x = positions[i].x;
dynamic->sample_locations.locations[i].y = positions[i].y;
locations[i].x = positions[i].x;
locations[i].y = positions[i].y;
}
} else {
const struct intel_sample_position *positions =
intel_get_sample_positions(samples);
for (uint32_t i = 0; i < samples; i++) {
dynamic->sample_locations.locations[i].x = positions[i].x;
dynamic->sample_locations.locations[i].y = positions[i].y;
locations[i].x = positions[i].x;
locations[i].y = positions[i].y;
}
}
dynamic->sample_locations.pipeline_samples = samples;
}
if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {

View File

@@ -49,6 +49,7 @@
#include "common/intel_gem.h"
#include "common/intel_l3_config.h"
#include "common/intel_measure.h"
#include "common/intel_sample_positions.h"
#include "dev/intel_device_info.h"
#include "blorp/blorp.h"
#include "compiler/brw_compiler.h"
@@ -2707,7 +2708,13 @@ struct anv_dynamic_state {
} line_stipple;
struct {
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
struct intel_sample_position locations_1[1];
struct intel_sample_position locations_2[2];
struct intel_sample_position locations_4[4];
struct intel_sample_position locations_8[8];
struct intel_sample_position locations_16[16];
/* Only valid on the pipeline dynamic state */
unsigned pipeline_samples;
} sample_locations;
struct {
@@ -2736,10 +2743,25 @@ struct anv_dynamic_state {
extern const struct anv_dynamic_state default_dynamic_state;
void anv_dynamic_state_init(struct anv_dynamic_state *state);
uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
const struct anv_dynamic_state *src,
uint32_t copy_mask);
static inline struct intel_sample_position *
anv_dynamic_state_get_sample_locations(struct anv_dynamic_state *state,
unsigned samples)
{
switch (samples) {
case 1: return state->sample_locations.locations_1; break;
case 2: return state->sample_locations.locations_2; break;
case 4: return state->sample_locations.locations_4; break;
case 8: return state->sample_locations.locations_8; break;
case 16: return state->sample_locations.locations_16; break;
default: unreachable("invalid sample count");
}
}
struct anv_surface_state {
struct anv_state state;
/** Address of the surface referred to by this state

View File

@@ -1788,6 +1788,18 @@ genX(BeginCommandBuffer)(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
}
#if GFX_VER >= 8
/* Emit the sample pattern at the beginning of the batch because the
* default locations emitted at the device initialization might have been
* changed by a previous command buffer.
*
* Do not change that when we're continuing a previous renderpass.
*/
if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT))
genX(emit_sample_pattern)(&cmd_buffer->batch, NULL);
#endif
#if GFX_VERx10 >= 75
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =

View File

@@ -929,24 +929,6 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
NULL);
#endif
/* If EXT_sample_locations is enabled and the sample locations are not
* dynamic, then we need to emit those position in the pipeline batch. On
* Gfx8+ this is part of 3DSTATE_SAMPLE_PATTERN, prior to that this is in
* 3DSTATE_MULTISAMPLE.
*/
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
#if GFX_VER >= 8
genX(emit_sample_pattern)(&pipeline->base.batch,
pipeline->rasterization_samples,
pipeline->dynamic_state.sample_locations.locations);
#else
genX(emit_multisample)(&pipeline->base.batch,
pipeline->rasterization_samples,
pipeline->dynamic_state.sample_locations.locations);
#endif
}
/* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits
* enabled, i.e. no coverage is removed from fragments.

View File

@@ -256,7 +256,7 @@ init_render_queue_state(struct anv_queue *queue)
#if GFX_VER >= 8
anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck);
genX(emit_sample_pattern)(&batch, 0, NULL);
genX(emit_sample_pattern)(&batch, NULL);
/* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the
* section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer
@@ -646,7 +646,7 @@ genX(emit_l3_config)(struct anv_batch *batch,
void
genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations)
const struct intel_sample_position *positions)
{
anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
ms.NumberofMultisamples = __builtin_ffs(samples) - 1;
@@ -660,56 +660,35 @@ genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
*/
ms.PixelPositionOffsetEnable = false;
#else
if (locations) {
switch (samples) {
case 1:
INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, locations);
INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, positions);
break;
case 2:
INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, locations);
INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, positions);
break;
case 4:
INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, locations);
INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, positions);
break;
case 8:
INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, locations);
INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, positions);
break;
default:
break;
}
} else {
switch (samples) {
case 1:
INTEL_SAMPLE_POS_1X(ms.Sample);
break;
case 2:
INTEL_SAMPLE_POS_2X(ms.Sample);
break;
case 4:
INTEL_SAMPLE_POS_4X(ms.Sample);
break;
case 8:
INTEL_SAMPLE_POS_8X(ms.Sample);
break;
default:
break;
}
}
#endif
}
}
#if GFX_VER >= 8
void
genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
const VkSampleLocationEXT *locations)
genX(emit_sample_pattern)(struct anv_batch *batch,
const struct anv_dynamic_state *d)
{
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
* VkPhysicalDeviceFeatures::standardSampleLocations.
*/
anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
if (locations) {
/* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:
*
* "When programming the sample offsets (for NUMSAMPLES_4 or _8
@@ -718,37 +697,23 @@ genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
* distance from the pixel center. This is required to get the
* correct centroid computation in the device."
*
* However, the Vulkan spec seems to require that the the samples
* occur in the order provided through the API. The standard sample
* patterns have the above property that they have monotonically
* increasing distances from the center but client-provided ones do
* not. As long as this only affects centroid calculations as the
* docs say, we should be ok because OpenGL and Vulkan only require
* that the centroid be some lit sample and that it's the same for
* all samples in a pixel; they have no requirement that it be the
* one closest to center.
* However, the Vulkan spec seems to require that the the samples occur
* in the order provided through the API. The standard sample patterns
* have the above property that they have monotonically increasing
* distances from the center but client-provided ones do not. As long as
* this only affects centroid calculations as the docs say, we should be
* ok because OpenGL and Vulkan only require that the centroid be some
* lit sample and that it's the same for all samples in a pixel; they
* have no requirement that it be the one closest to center.
*/
switch (samples) {
case 1:
INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, locations);
break;
case 2:
INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, locations);
break;
case 4:
INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, locations);
break;
case 8:
INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, locations);
break;
if (d) {
INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, d->sample_locations.locations_1);
INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, d->sample_locations.locations_2);
INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, d->sample_locations.locations_4);
INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, d->sample_locations.locations_8);
#if GFX_VER >= 9
case 16:
INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, locations);
break;
INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, d->sample_locations.locations_16);
#endif
default:
break;
}
} else {
INTEL_SAMPLE_POS_1X(sp._1xSample);
INTEL_SAMPLE_POS_2X(sp._2xSample);

View File

@@ -301,10 +301,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
}
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
genX(emit_multisample)(&cmd_buffer->batch,
pipeline->rasterization_samples,
d->sample_locations.locations);
anv_dynamic_state_get_sample_locations(d,
pipeline->rasterization_samples));
}
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |

View File

@@ -622,11 +622,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
}
#endif
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
genX(emit_sample_pattern)(&cmd_buffer->batch,
pipeline->rasterization_samples,
d->sample_locations.locations);
}
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
genX(emit_sample_pattern)(&cmd_buffer->batch, d);
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |