radv/gfx10: add Wave32 support for fragment shaders
It can be enabled with RADV_PERFTEST=pswave32. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -65,6 +65,7 @@ enum {
|
|||||||
RADV_PERFTEST_SHADER_BALLOT = 0x40,
|
RADV_PERFTEST_SHADER_BALLOT = 0x40,
|
||||||
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
|
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
|
||||||
RADV_PERFTEST_CS_WAVE_32 = 0x100,
|
RADV_PERFTEST_CS_WAVE_32 = 0x100,
|
||||||
|
RADV_PERFTEST_PS_WAVE_32 = 0x200,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@@ -385,10 +385,15 @@ radv_physical_device_init(struct radv_physical_device *device,
|
|||||||
|
|
||||||
/* Determine the number of threads per wave for all stages. */
|
/* Determine the number of threads per wave for all stages. */
|
||||||
device->cs_wave_size = 64;
|
device->cs_wave_size = 64;
|
||||||
|
device->ps_wave_size = 64;
|
||||||
|
|
||||||
if (device->rad_info.chip_class >= GFX10) {
|
if (device->rad_info.chip_class >= GFX10) {
|
||||||
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
|
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
|
||||||
device->cs_wave_size = 32;
|
device->cs_wave_size = 32;
|
||||||
|
|
||||||
|
/* For pixel shaders, wave64 is recommanded. */
|
||||||
|
if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
|
||||||
|
device->ps_wave_size = 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
radv_physical_device_init_mem_types(device);
|
radv_physical_device_init_mem_types(device);
|
||||||
@@ -503,6 +508,7 @@ static const struct debug_control radv_perftest_options[] = {
|
|||||||
{"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
|
{"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
|
||||||
{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
|
{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
|
||||||
{"cswave32", RADV_PERFTEST_CS_WAVE_32},
|
{"cswave32", RADV_PERFTEST_CS_WAVE_32},
|
||||||
|
{"pswave32", RADV_PERFTEST_PS_WAVE_32},
|
||||||
{NULL, 0}
|
{NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -4323,6 +4323,8 @@ radv_nir_shader_wave_size(struct nir_shader *const *shaders, int shader_count,
|
|||||||
{
|
{
|
||||||
if (shaders[0]->info.stage == MESA_SHADER_COMPUTE)
|
if (shaders[0]->info.stage == MESA_SHADER_COMPUTE)
|
||||||
return options->cs_wave_size;
|
return options->cs_wave_size;
|
||||||
|
else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
|
||||||
|
return options->ps_wave_size;
|
||||||
return 64;
|
return 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -4060,7 +4060,8 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
|
|||||||
ps->config.spi_ps_input_addr);
|
ps->config.spi_ps_input_addr);
|
||||||
|
|
||||||
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
|
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
|
||||||
S_0286D8_NUM_INTERP(ps->info.fs.num_interp));
|
S_0286D8_NUM_INTERP(ps->info.fs.num_interp) |
|
||||||
|
S_0286D8_PS_W32_EN(pipeline->device->physical_device->ps_wave_size == 32));
|
||||||
|
|
||||||
radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
|
radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
|
||||||
|
|
||||||
|
@@ -302,6 +302,7 @@ struct radv_physical_device {
|
|||||||
bool has_dcc_constant_encode;
|
bool has_dcc_constant_encode;
|
||||||
|
|
||||||
/* Number of threads per wave. */
|
/* Number of threads per wave. */
|
||||||
|
uint8_t ps_wave_size;
|
||||||
uint8_t cs_wave_size;
|
uint8_t cs_wave_size;
|
||||||
|
|
||||||
/* This is the drivers on-disk cache used as a fallback as opposed to
|
/* This is the drivers on-disk cache used as a fallback as opposed to
|
||||||
|
@@ -673,7 +673,8 @@ radv_get_shader_wave_size(const struct radv_physical_device *pdevice,
|
|||||||
{
|
{
|
||||||
if (stage == MESA_SHADER_COMPUTE)
|
if (stage == MESA_SHADER_COMPUTE)
|
||||||
return pdevice->cs_wave_size;
|
return pdevice->cs_wave_size;
|
||||||
|
else if (stage == MESA_SHADER_FRAGMENT)
|
||||||
|
return pdevice->ps_wave_size;
|
||||||
return 64;
|
return 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1142,6 +1143,7 @@ shader_variant_compile(struct radv_device *device,
|
|||||||
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
|
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
|
||||||
options->address32_hi = device->physical_device->rad_info.address32_hi;
|
options->address32_hi = device->physical_device->rad_info.address32_hi;
|
||||||
options->cs_wave_size = device->physical_device->cs_wave_size;
|
options->cs_wave_size = device->physical_device->cs_wave_size;
|
||||||
|
options->ps_wave_size = device->physical_device->ps_wave_size;
|
||||||
|
|
||||||
if (options->supports_spill)
|
if (options->supports_spill)
|
||||||
tm_options |= AC_TM_SUPPORTS_SPILL;
|
tm_options |= AC_TM_SUPPORTS_SPILL;
|
||||||
|
@@ -129,6 +129,7 @@ struct radv_nir_compiler_options {
|
|||||||
uint32_t tess_offchip_block_dw_size;
|
uint32_t tess_offchip_block_dw_size;
|
||||||
uint32_t address32_hi;
|
uint32_t address32_hi;
|
||||||
uint8_t cs_wave_size;
|
uint8_t cs_wave_size;
|
||||||
|
uint8_t ps_wave_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum radv_ud_index {
|
enum radv_ud_index {
|
||||||
|
Reference in New Issue
Block a user