radv: Enable RB+ where possible.

According to Marek, not enabling it on Stoney has a significant
negative performance impact. (And I guess this might impact
performance on Raven as well)

The register settings are pretty much copied from radeonsi. I did
not put this in the pipeline as that would make the pipeline more
dependent on the format which mean we would have to have more
pipelines for the meta shaders.

v2: Don't clear RB+ regs if not enabled as the CLEAR_STATE packet
    does already.
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen
2018-04-09 11:23:21 +02:00
parent 5d895a1f37
commit ed94638156
3 changed files with 151 additions and 6 deletions

View File

@@ -678,6 +678,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
state->prefetch_L2_mask &= ~mask;
}
static void
radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
{
if (!cmd_buffer->device->physical_device->rbplus_allowed)
return;
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
unsigned sx_ps_downconvert = 0;
unsigned sx_blend_opt_epsilon = 0;
unsigned sx_blend_opt_control = 0;
for (unsigned i = 0; i < subpass->color_count; ++i) {
if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
continue;
int idx = subpass->color_attachments[i].attachment;
struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb;
unsigned format = G_028C70_FORMAT(cb->cb_color_info);
unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
bool has_alpha, has_rgb;
/* Set if RGB and A are present. */
has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
if (format == V_028C70_COLOR_8 ||
format == V_028C70_COLOR_16 ||
format == V_028C70_COLOR_32)
has_rgb = !has_alpha;
else
has_rgb = true;
/* Check the colormask and export format. */
if (!(colormask & 0x7))
has_rgb = false;
if (!(colormask & 0x8))
has_alpha = false;
if (spi_format == V_028714_SPI_SHADER_ZERO) {
has_rgb = false;
has_alpha = false;
}
/* Disable value checking for disabled channels. */
if (!has_rgb)
sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
if (!has_alpha)
sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
/* Enable down-conversion for 32bpp and smaller formats. */
switch (format) {
case V_028C70_COLOR_8:
case V_028C70_COLOR_8_8:
case V_028C70_COLOR_8_8_8_8:
/* For 1 and 2-channel formats, use the superset thereof. */
if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
}
break;
case V_028C70_COLOR_5_6_5:
if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
}
break;
case V_028C70_COLOR_1_5_5_5:
if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
}
break;
case V_028C70_COLOR_4_4_4_4:
if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
}
break;
case V_028C70_COLOR_32:
if (swap == V_028C70_SWAP_STD &&
spi_format == V_028714_SPI_SHADER_32_R)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
else if (swap == V_028C70_SWAP_ALT_REV &&
spi_format == V_028714_SPI_SHADER_32_AR)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
break;
case V_028C70_COLOR_16:
case V_028C70_COLOR_16_16:
/* For 1-channel formats, use the superset thereof. */
if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
if (swap == V_028C70_SWAP_STD ||
swap == V_028C70_SWAP_STD_REV)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
else
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
}
break;
case V_028C70_COLOR_10_11_11:
if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
}
break;
case V_028C70_COLOR_2_10_10_10:
if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
}
break;
}
}
radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
}
static void
radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
@@ -3005,6 +3141,10 @@ static void
radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
const struct radv_draw_info *info)
{
if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
radv_emit_rbplus_state(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
radv_emit_graphics_pipeline(cmd_buffer);

View File

@@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs,
radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */
radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */
radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */
}
radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
pipeline->graphics.col_format = blend->spi_shader_col_format;
pipeline->graphics.cb_target_mask = blend->cb_target_mask;
}
@@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device *device,
else
z_order = V_02880C_LATE_Z;
bool disable_rbplus = device->physical_device->has_rbplus &&
!device->physical_device->rbplus_allowed;
return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
@@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device *device,
S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus);
S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
}
static void

View File

@@ -1238,6 +1238,10 @@ struct radv_pipeline {
bool can_use_guardband;
uint32_t needed_dynamic_state;
bool disable_out_of_order_rast_for_occlusion;
/* Used for rbplus */
uint32_t col_format;
uint32_t cb_target_mask;
} graphics;
};