anv: rework sample location
On Gfx7 we can only give the sample location for a given multisample number. This means everytime the multisampling value changes, we have to re-emit the locations. It's fine because it's also where (3DSTATE_MULTISAMPLE) the number of samples is stored. On Gfx8+ though, 3DSTATE_MULTISAMPLE only holds the number of samples and all the sample locations for all number of samples are located in 3DSTATE_SAMPLE_PATTERN. So to be more effecient there, we need to track the locations for all sample numbers and compare new values with the relevant sample count when touching the dynamic state. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16220>
This commit is contained in:

committed by
Marge Bot

parent
810518fda7
commit
168b13364f
@@ -107,6 +107,23 @@ const struct anv_dynamic_state default_dynamic_state = {
|
||||
.logic_op = 0,
|
||||
};
|
||||
|
||||
void
|
||||
anv_dynamic_state_init(struct anv_dynamic_state *state)
|
||||
{
|
||||
*state = default_dynamic_state;
|
||||
|
||||
#define INIT_LOCATIONS(idx) \
|
||||
memcpy(state->sample_locations.locations_##idx, \
|
||||
intel_sample_positions_##idx##x, \
|
||||
sizeof(state->sample_locations.locations_##idx))
|
||||
INIT_LOCATIONS(1);
|
||||
INIT_LOCATIONS(2);
|
||||
INIT_LOCATIONS(4);
|
||||
INIT_LOCATIONS(8);
|
||||
INIT_LOCATIONS(16);
|
||||
#undef INIT_LOCATIONS
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the dynamic state from src to dest based on the copy_mask.
|
||||
*
|
||||
@@ -199,10 +216,26 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest,
|
||||
ANV_CMP_COPY(logic_op, ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP);
|
||||
|
||||
if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
|
||||
typed_memcpy(dest->sample_locations.locations,
|
||||
src->sample_locations.locations,
|
||||
ARRAY_SIZE(src->sample_locations.locations));
|
||||
changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
|
||||
#define ANV_CMP_COPY_LOCATIONS(idx) \
|
||||
if (memcmp(dest->sample_locations.locations_##idx, \
|
||||
src->sample_locations.locations_##idx, \
|
||||
sizeof(src->sample_locations.locations_##idx))) { \
|
||||
typed_memcpy(dest->sample_locations.locations_##idx, \
|
||||
src->sample_locations.locations_##idx, \
|
||||
ARRAY_SIZE(src->sample_locations.locations_##idx)); \
|
||||
changed |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; \
|
||||
}
|
||||
|
||||
switch (src->sample_locations.pipeline_samples) {
|
||||
case 1: ANV_CMP_COPY_LOCATIONS(1); break;
|
||||
case 2: ANV_CMP_COPY_LOCATIONS(2); break;
|
||||
case 4: ANV_CMP_COPY_LOCATIONS(4); break;
|
||||
case 8: ANV_CMP_COPY_LOCATIONS(8); break;
|
||||
case 16: ANV_CMP_COPY_LOCATIONS(16); break;
|
||||
default: unreachable("invalid sample count");
|
||||
}
|
||||
|
||||
#undef ANV_CMP_COPY_LOCATIONS
|
||||
}
|
||||
|
||||
ANV_CMP_COPY(color_writes, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
|
||||
@@ -226,7 +259,7 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
state->current_pipeline = UINT32_MAX;
|
||||
state->restart_index = UINT32_MAX;
|
||||
state->gfx.dynamic = default_dynamic_state;
|
||||
anv_dynamic_state_init(&state->gfx.dynamic);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -855,11 +888,16 @@ void anv_CmdSetSampleLocationsEXT(
|
||||
|
||||
struct anv_dynamic_state *dyn_state = &cmd_buffer->state.gfx.dynamic;
|
||||
uint32_t samples = pSampleLocationsInfo->sampleLocationsPerPixel;
|
||||
|
||||
typed_memcpy(dyn_state->sample_locations.locations,
|
||||
pSampleLocationsInfo->pSampleLocations, samples);
|
||||
|
||||
struct intel_sample_position *positions =
|
||||
anv_dynamic_state_get_sample_locations(dyn_state, samples);
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
if (positions[i].x != pSampleLocationsInfo->pSampleLocations[i].x ||
|
||||
positions[i].y != pSampleLocationsInfo->pSampleLocations[i].y) {
|
||||
positions[i].x = pSampleLocationsInfo->pSampleLocations[i].x;
|
||||
positions[i].y = pSampleLocationsInfo->pSampleLocations[i].y;
|
||||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void anv_CmdSetLineStippleEXT(
|
||||
|
@@ -36,6 +36,8 @@
|
||||
#error This file is included by means other than anv_private.h
|
||||
#endif
|
||||
|
||||
struct intel_sample_positions;
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_cullmode)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_front_face)[];
|
||||
@@ -129,10 +131,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||
enum intel_urb_deref_block_size *deref_block_size);
|
||||
|
||||
void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
|
||||
const VkSampleLocationEXT *locations);
|
||||
const struct intel_sample_position *positions);
|
||||
|
||||
void genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
|
||||
const VkSampleLocationEXT *locations);
|
||||
void genX(emit_sample_pattern)(struct anv_batch *batch,
|
||||
const struct anv_dynamic_state *dynamic_state);
|
||||
|
||||
void genX(emit_shading_rate)(struct anv_batch *batch,
|
||||
const struct anv_graphics_pipeline *pipeline,
|
||||
|
@@ -2110,7 +2110,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
|
||||
{
|
||||
anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
|
||||
|
||||
pipeline->dynamic_state = default_dynamic_state;
|
||||
anv_dynamic_state_init(&pipeline->dynamic_state);
|
||||
|
||||
states &= ~pipeline->dynamic_states;
|
||||
|
||||
@@ -2314,22 +2314,33 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
|
||||
const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
|
||||
vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
|
||||
|
||||
uint32_t samples = ms_info ? ms_info->rasterizationSamples : 1;
|
||||
uint32_t samples = MAX2(1, ms_info ? ms_info->rasterizationSamples : 1);
|
||||
struct intel_sample_position *locations;
|
||||
switch (samples) {
|
||||
case 1: locations = dynamic->sample_locations.locations_1; break;
|
||||
case 2: locations = dynamic->sample_locations.locations_2; break;
|
||||
case 4: locations = dynamic->sample_locations.locations_4; break;
|
||||
case 8: locations = dynamic->sample_locations.locations_8; break;
|
||||
case 16: locations = dynamic->sample_locations.locations_16; break;
|
||||
default: unreachable("invalid sample count");
|
||||
}
|
||||
|
||||
if (sl_info) {
|
||||
const VkSampleLocationEXT *positions =
|
||||
sl_info->sampleLocationsInfo.pSampleLocations;
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
dynamic->sample_locations.locations[i].x = positions[i].x;
|
||||
dynamic->sample_locations.locations[i].y = positions[i].y;
|
||||
locations[i].x = positions[i].x;
|
||||
locations[i].y = positions[i].y;
|
||||
}
|
||||
} else {
|
||||
const struct intel_sample_position *positions =
|
||||
intel_get_sample_positions(samples);
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
dynamic->sample_locations.locations[i].x = positions[i].x;
|
||||
dynamic->sample_locations.locations[i].y = positions[i].y;
|
||||
locations[i].x = positions[i].x;
|
||||
locations[i].y = positions[i].y;
|
||||
}
|
||||
}
|
||||
dynamic->sample_locations.pipeline_samples = samples;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
|
||||
|
@@ -49,6 +49,7 @@
|
||||
#include "common/intel_gem.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "common/intel_measure.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "blorp/blorp.h"
|
||||
#include "compiler/brw_compiler.h"
|
||||
@@ -2707,7 +2708,13 @@ struct anv_dynamic_state {
|
||||
} line_stipple;
|
||||
|
||||
struct {
|
||||
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
|
||||
struct intel_sample_position locations_1[1];
|
||||
struct intel_sample_position locations_2[2];
|
||||
struct intel_sample_position locations_4[4];
|
||||
struct intel_sample_position locations_8[8];
|
||||
struct intel_sample_position locations_16[16];
|
||||
/* Only valid on the pipeline dynamic state */
|
||||
unsigned pipeline_samples;
|
||||
} sample_locations;
|
||||
|
||||
struct {
|
||||
@@ -2736,10 +2743,25 @@ struct anv_dynamic_state {
|
||||
|
||||
extern const struct anv_dynamic_state default_dynamic_state;
|
||||
|
||||
void anv_dynamic_state_init(struct anv_dynamic_state *state);
|
||||
uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
|
||||
const struct anv_dynamic_state *src,
|
||||
uint32_t copy_mask);
|
||||
|
||||
static inline struct intel_sample_position *
|
||||
anv_dynamic_state_get_sample_locations(struct anv_dynamic_state *state,
|
||||
unsigned samples)
|
||||
{
|
||||
switch (samples) {
|
||||
case 1: return state->sample_locations.locations_1; break;
|
||||
case 2: return state->sample_locations.locations_2; break;
|
||||
case 4: return state->sample_locations.locations_4; break;
|
||||
case 8: return state->sample_locations.locations_8; break;
|
||||
case 16: return state->sample_locations.locations_16; break;
|
||||
default: unreachable("invalid sample count");
|
||||
}
|
||||
}
|
||||
|
||||
struct anv_surface_state {
|
||||
struct anv_state state;
|
||||
/** Address of the surface referred to by this state
|
||||
|
@@ -1788,6 +1788,18 @@ genX(BeginCommandBuffer)(
|
||||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* Emit the sample pattern at the beginning of the batch because the
|
||||
* default locations emitted at the device initialization might have been
|
||||
* changed by a previous command buffer.
|
||||
*
|
||||
* Do not change that when we're continuing a previous renderpass.
|
||||
*/
|
||||
if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
|
||||
!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT))
|
||||
genX(emit_sample_pattern)(&cmd_buffer->batch, NULL);
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
|
||||
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =
|
||||
|
@@ -929,24 +929,6 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
|
||||
NULL);
|
||||
#endif
|
||||
|
||||
/* If EXT_sample_locations is enabled and the sample locations are not
|
||||
* dynamic, then we need to emit those position in the pipeline batch. On
|
||||
* Gfx8+ this is part of 3DSTATE_SAMPLE_PATTERN, prior to that this is in
|
||||
* 3DSTATE_MULTISAMPLE.
|
||||
*/
|
||||
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
|
||||
!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
|
||||
#if GFX_VER >= 8
|
||||
genX(emit_sample_pattern)(&pipeline->base.batch,
|
||||
pipeline->rasterization_samples,
|
||||
pipeline->dynamic_state.sample_locations.locations);
|
||||
#else
|
||||
genX(emit_multisample)(&pipeline->base.batch,
|
||||
pipeline->rasterization_samples,
|
||||
pipeline->dynamic_state.sample_locations.locations);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* From the Vulkan 1.0 spec:
|
||||
* If pSampleMask is NULL, it is treated as if the mask has all bits
|
||||
* enabled, i.e. no coverage is removed from fragments.
|
||||
|
@@ -256,7 +256,7 @@ init_render_queue_state(struct anv_queue *queue)
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck);
|
||||
|
||||
genX(emit_sample_pattern)(&batch, 0, NULL);
|
||||
genX(emit_sample_pattern)(&batch, NULL);
|
||||
|
||||
/* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the
|
||||
* section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer
|
||||
@@ -646,7 +646,7 @@ genX(emit_l3_config)(struct anv_batch *batch,
|
||||
|
||||
void
|
||||
genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
|
||||
const VkSampleLocationEXT *locations)
|
||||
const struct intel_sample_position *positions)
|
||||
{
|
||||
anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
|
||||
ms.NumberofMultisamples = __builtin_ffs(samples) - 1;
|
||||
@@ -660,56 +660,35 @@ genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
|
||||
*/
|
||||
ms.PixelPositionOffsetEnable = false;
|
||||
#else
|
||||
|
||||
if (locations) {
|
||||
switch (samples) {
|
||||
case 1:
|
||||
INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, locations);
|
||||
INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, positions);
|
||||
break;
|
||||
case 2:
|
||||
INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, locations);
|
||||
INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, positions);
|
||||
break;
|
||||
case 4:
|
||||
INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, locations);
|
||||
INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, positions);
|
||||
break;
|
||||
case 8:
|
||||
INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, locations);
|
||||
INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, positions);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (samples) {
|
||||
case 1:
|
||||
INTEL_SAMPLE_POS_1X(ms.Sample);
|
||||
break;
|
||||
case 2:
|
||||
INTEL_SAMPLE_POS_2X(ms.Sample);
|
||||
break;
|
||||
case 4:
|
||||
INTEL_SAMPLE_POS_4X(ms.Sample);
|
||||
break;
|
||||
case 8:
|
||||
INTEL_SAMPLE_POS_8X(ms.Sample);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
void
|
||||
genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
|
||||
const VkSampleLocationEXT *locations)
|
||||
genX(emit_sample_pattern)(struct anv_batch *batch,
|
||||
const struct anv_dynamic_state *d)
|
||||
{
|
||||
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
|
||||
* VkPhysicalDeviceFeatures::standardSampleLocations.
|
||||
*/
|
||||
anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
|
||||
if (locations) {
|
||||
/* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:
|
||||
*
|
||||
* "When programming the sample offsets (for NUMSAMPLES_4 or _8
|
||||
@@ -718,37 +697,23 @@ genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
|
||||
* distance from the pixel center. This is required to get the
|
||||
* correct centroid computation in the device."
|
||||
*
|
||||
* However, the Vulkan spec seems to require that the the samples
|
||||
* occur in the order provided through the API. The standard sample
|
||||
* patterns have the above property that they have monotonically
|
||||
* increasing distances from the center but client-provided ones do
|
||||
* not. As long as this only affects centroid calculations as the
|
||||
* docs say, we should be ok because OpenGL and Vulkan only require
|
||||
* that the centroid be some lit sample and that it's the same for
|
||||
* all samples in a pixel; they have no requirement that it be the
|
||||
* one closest to center.
|
||||
* However, the Vulkan spec seems to require that the the samples occur
|
||||
* in the order provided through the API. The standard sample patterns
|
||||
* have the above property that they have monotonically increasing
|
||||
* distances from the center but client-provided ones do not. As long as
|
||||
* this only affects centroid calculations as the docs say, we should be
|
||||
* ok because OpenGL and Vulkan only require that the centroid be some
|
||||
* lit sample and that it's the same for all samples in a pixel; they
|
||||
* have no requirement that it be the one closest to center.
|
||||
*/
|
||||
switch (samples) {
|
||||
case 1:
|
||||
INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, locations);
|
||||
break;
|
||||
case 2:
|
||||
INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, locations);
|
||||
break;
|
||||
case 4:
|
||||
INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, locations);
|
||||
break;
|
||||
case 8:
|
||||
INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, locations);
|
||||
break;
|
||||
if (d) {
|
||||
INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, d->sample_locations.locations_1);
|
||||
INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, d->sample_locations.locations_2);
|
||||
INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, d->sample_locations.locations_4);
|
||||
INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, d->sample_locations.locations_8);
|
||||
#if GFX_VER >= 9
|
||||
case 16:
|
||||
INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, locations);
|
||||
break;
|
||||
INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, d->sample_locations.locations_16);
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
INTEL_SAMPLE_POS_1X(sp._1xSample);
|
||||
INTEL_SAMPLE_POS_2X(sp._2xSample);
|
||||
|
@@ -301,10 +301,12 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) {
|
||||
genX(emit_multisample)(&cmd_buffer->batch,
|
||||
pipeline->rasterization_samples,
|
||||
d->sample_locations.locations);
|
||||
anv_dynamic_state_get_sample_locations(d,
|
||||
pipeline->rasterization_samples));
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
|
||||
|
@@ -622,11 +622,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
|
||||
genX(emit_sample_pattern)(&cmd_buffer->batch,
|
||||
pipeline->rasterization_samples,
|
||||
d->sample_locations.locations);
|
||||
}
|
||||
if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
|
||||
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
|
||||
genX(emit_sample_pattern)(&cmd_buffer->batch, d);
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
|
||||
|
Reference in New Issue
Block a user