radv: Do not change scratch settings while shaders are active.
When the scratch ringbuffer settings are changed, the shader unit has to be idle or we will have shaders using old and new settings. That combination is not supported on the HW (likely the offset is ringbuffer idx * WAVESIZE * 1024). CC: <mesa-stable@lists.freedesktop.org> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
@@ -332,8 +332,10 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmd_buffer->push_constant_stages = 0;
|
cmd_buffer->push_constant_stages = 0;
|
||||||
cmd_buffer->scratch_size_needed = 0;
|
cmd_buffer->scratch_size_per_wave_needed = 0;
|
||||||
cmd_buffer->compute_scratch_size_needed = 0;
|
cmd_buffer->scratch_waves_wanted = 0;
|
||||||
|
cmd_buffer->compute_scratch_size_per_wave_needed = 0;
|
||||||
|
cmd_buffer->compute_scratch_waves_wanted = 0;
|
||||||
cmd_buffer->esgs_ring_size_needed = 0;
|
cmd_buffer->esgs_ring_size_needed = 0;
|
||||||
cmd_buffer->gsvs_ring_size_needed = 0;
|
cmd_buffer->gsvs_ring_size_needed = 0;
|
||||||
cmd_buffer->tess_rings_needed = false;
|
cmd_buffer->tess_rings_needed = false;
|
||||||
@@ -1147,9 +1149,10 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
radv_update_multisample_state(cmd_buffer, pipeline);
|
radv_update_multisample_state(cmd_buffer, pipeline);
|
||||||
radv_update_binning_state(cmd_buffer, pipeline);
|
radv_update_binning_state(cmd_buffer, pipeline);
|
||||||
|
|
||||||
cmd_buffer->scratch_size_needed =
|
cmd_buffer->scratch_size_per_wave_needed = MAX2(cmd_buffer->scratch_size_per_wave_needed,
|
||||||
MAX2(cmd_buffer->scratch_size_needed,
|
pipeline->scratch_bytes_per_wave);
|
||||||
pipeline->max_waves * pipeline->scratch_bytes_per_wave);
|
cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted,
|
||||||
|
pipeline->max_waves);
|
||||||
|
|
||||||
if (!cmd_buffer->state.emitted_pipeline ||
|
if (!cmd_buffer->state.emitted_pipeline ||
|
||||||
cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
|
cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
|
||||||
@@ -3678,9 +3681,10 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
|
||||||
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
|
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
|
||||||
|
|
||||||
cmd_buffer->compute_scratch_size_needed =
|
cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed,
|
||||||
MAX2(cmd_buffer->compute_scratch_size_needed,
|
pipeline->scratch_bytes_per_wave);
|
||||||
pipeline->max_waves * pipeline->scratch_bytes_per_wave);
|
cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted,
|
||||||
|
pipeline->max_waves);
|
||||||
|
|
||||||
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
|
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
|
||||||
pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
|
pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
|
||||||
@@ -4009,10 +4013,14 @@ void radv_CmdExecuteCommands(
|
|||||||
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
||||||
RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
|
RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
|
||||||
|
|
||||||
primary->scratch_size_needed = MAX2(primary->scratch_size_needed,
|
primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed,
|
||||||
secondary->scratch_size_needed);
|
secondary->scratch_size_per_wave_needed);
|
||||||
primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed,
|
primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted,
|
||||||
secondary->compute_scratch_size_needed);
|
secondary->scratch_waves_wanted);
|
||||||
|
primary->compute_scratch_size_per_wave_needed = MAX2(primary->compute_scratch_size_per_wave_needed,
|
||||||
|
secondary->compute_scratch_size_per_wave_needed);
|
||||||
|
primary->compute_scratch_waves_wanted = MAX2(primary->compute_scratch_waves_wanted,
|
||||||
|
secondary->compute_scratch_waves_wanted);
|
||||||
|
|
||||||
if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
|
if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
|
||||||
primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
|
primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
|
||||||
|
@@ -3138,9 +3138,28 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
||||||
|
uint32_t size_per_wave, uint32_t waves,
|
||||||
|
struct radeon_winsys_bo *scratch_bo)
|
||||||
|
{
|
||||||
|
if (queue->queue_family_index != RADV_QUEUE_GENERAL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!scratch_bo)
|
||||||
|
return;
|
||||||
|
|
||||||
|
radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
|
||||||
|
|
||||||
|
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
|
||||||
|
S_0286E8_WAVES(waves) |
|
||||||
|
S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
||||||
struct radeon_winsys_bo *compute_scratch_bo)
|
uint32_t size_per_wave, uint32_t waves,
|
||||||
|
struct radeon_winsys_bo *compute_scratch_bo)
|
||||||
{
|
{
|
||||||
uint64_t scratch_va;
|
uint64_t scratch_va;
|
||||||
|
|
||||||
@@ -3155,6 +3174,10 @@ radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
|||||||
radeon_emit(cs, scratch_va);
|
radeon_emit(cs, scratch_va);
|
||||||
radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
|
radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
|
||||||
S_008F04_SWIZZLE_ENABLE(1));
|
S_008F04_SWIZZLE_ENABLE(1));
|
||||||
|
|
||||||
|
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
||||||
|
S_00B860_WAVES(waves) |
|
||||||
|
S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -3235,8 +3258,10 @@ radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
|
|||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
radv_get_preamble_cs(struct radv_queue *queue,
|
radv_get_preamble_cs(struct radv_queue *queue,
|
||||||
uint32_t scratch_size,
|
uint32_t scratch_size_per_wave,
|
||||||
uint32_t compute_scratch_size,
|
uint32_t scratch_waves,
|
||||||
|
uint32_t compute_scratch_size_per_wave,
|
||||||
|
uint32_t compute_scratch_waves,
|
||||||
uint32_t esgs_ring_size,
|
uint32_t esgs_ring_size,
|
||||||
uint32_t gsvs_ring_size,
|
uint32_t gsvs_ring_size,
|
||||||
bool needs_tess_rings,
|
bool needs_tess_rings,
|
||||||
@@ -3280,8 +3305,22 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
tess_offchip_ring_size = max_offchip_buffers *
|
tess_offchip_ring_size = max_offchip_buffers *
|
||||||
queue->device->tess_offchip_block_dw_size * 4;
|
queue->device->tess_offchip_block_dw_size * 4;
|
||||||
|
|
||||||
if (scratch_size <= queue->scratch_size &&
|
scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
|
||||||
compute_scratch_size <= queue->compute_scratch_size &&
|
if (scratch_size_per_wave)
|
||||||
|
scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
|
||||||
|
else
|
||||||
|
scratch_waves = 0;
|
||||||
|
|
||||||
|
compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
|
||||||
|
if (compute_scratch_size_per_wave)
|
||||||
|
compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
|
||||||
|
else
|
||||||
|
compute_scratch_waves = 0;
|
||||||
|
|
||||||
|
if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
|
||||||
|
scratch_waves <= queue->scratch_waves &&
|
||||||
|
compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
|
||||||
|
compute_scratch_waves <= queue->compute_scratch_waves &&
|
||||||
esgs_ring_size <= queue->esgs_ring_size &&
|
esgs_ring_size <= queue->esgs_ring_size &&
|
||||||
gsvs_ring_size <= queue->gsvs_ring_size &&
|
gsvs_ring_size <= queue->gsvs_ring_size &&
|
||||||
!add_tess_rings && !add_gds && !add_sample_positions &&
|
!add_tess_rings && !add_gds && !add_sample_positions &&
|
||||||
@@ -3289,13 +3328,16 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
|
*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
|
||||||
*initial_preamble_cs = queue->initial_preamble_cs;
|
*initial_preamble_cs = queue->initial_preamble_cs;
|
||||||
*continue_preamble_cs = queue->continue_preamble_cs;
|
*continue_preamble_cs = queue->continue_preamble_cs;
|
||||||
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size &&
|
if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
|
||||||
!needs_tess_rings && !needs_gds && !needs_sample_positions)
|
!esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
|
||||||
|
!needs_gds && !needs_sample_positions)
|
||||||
*continue_preamble_cs = NULL;
|
*continue_preamble_cs = NULL;
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scratch_size > queue->scratch_size) {
|
uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
|
||||||
|
uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
|
||||||
|
if (scratch_size > queue_scratch_size) {
|
||||||
scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
|
scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
|
||||||
scratch_size,
|
scratch_size,
|
||||||
4096,
|
4096,
|
||||||
@@ -3307,7 +3349,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
} else
|
} else
|
||||||
scratch_bo = queue->scratch_bo;
|
scratch_bo = queue->scratch_bo;
|
||||||
|
|
||||||
if (compute_scratch_size > queue->compute_scratch_size) {
|
uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
|
||||||
|
uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
|
||||||
|
if (compute_scratch_size > compute_queue_scratch_size) {
|
||||||
compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
|
compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
|
||||||
compute_scratch_size,
|
compute_scratch_size,
|
||||||
4096,
|
4096,
|
||||||
@@ -3475,7 +3519,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
|
radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
|
||||||
tess_factor_ring_size, tess_rings_bo);
|
tess_factor_ring_size, tess_rings_bo);
|
||||||
radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
|
radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
|
||||||
radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
|
radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
|
||||||
|
compute_scratch_waves, compute_scratch_bo);
|
||||||
|
radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
|
||||||
|
scratch_waves, scratch_bo);
|
||||||
|
|
||||||
if (gds_bo)
|
if (gds_bo)
|
||||||
radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
|
radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
|
||||||
@@ -3528,15 +3575,17 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
if (queue->scratch_bo)
|
if (queue->scratch_bo)
|
||||||
queue->device->ws->buffer_destroy(queue->scratch_bo);
|
queue->device->ws->buffer_destroy(queue->scratch_bo);
|
||||||
queue->scratch_bo = scratch_bo;
|
queue->scratch_bo = scratch_bo;
|
||||||
queue->scratch_size = scratch_size;
|
|
||||||
}
|
}
|
||||||
|
queue->scratch_size_per_wave = scratch_size_per_wave;
|
||||||
|
queue->scratch_waves = scratch_waves;
|
||||||
|
|
||||||
if (compute_scratch_bo != queue->compute_scratch_bo) {
|
if (compute_scratch_bo != queue->compute_scratch_bo) {
|
||||||
if (queue->compute_scratch_bo)
|
if (queue->compute_scratch_bo)
|
||||||
queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
|
queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
|
||||||
queue->compute_scratch_bo = compute_scratch_bo;
|
queue->compute_scratch_bo = compute_scratch_bo;
|
||||||
queue->compute_scratch_size = compute_scratch_size;
|
|
||||||
}
|
}
|
||||||
|
queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
|
||||||
|
queue->compute_scratch_waves = compute_scratch_waves;
|
||||||
|
|
||||||
if (esgs_ring_bo != queue->esgs_ring_bo) {
|
if (esgs_ring_bo != queue->esgs_ring_bo) {
|
||||||
if (queue->esgs_ring_bo)
|
if (queue->esgs_ring_bo)
|
||||||
@@ -3832,8 +3881,8 @@ radv_get_preambles(struct radv_queue *queue,
|
|||||||
struct radeon_cmdbuf **initial_preamble_cs,
|
struct radeon_cmdbuf **initial_preamble_cs,
|
||||||
struct radeon_cmdbuf **continue_preamble_cs)
|
struct radeon_cmdbuf **continue_preamble_cs)
|
||||||
{
|
{
|
||||||
uint32_t scratch_size = 0;
|
uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
|
||||||
uint32_t compute_scratch_size = 0;
|
uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
|
||||||
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
|
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
|
||||||
bool tess_rings_needed = false;
|
bool tess_rings_needed = false;
|
||||||
bool gds_needed = false;
|
bool gds_needed = false;
|
||||||
@@ -3843,9 +3892,12 @@ radv_get_preambles(struct radv_queue *queue,
|
|||||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
|
||||||
cmd_buffers[j]);
|
cmd_buffers[j]);
|
||||||
|
|
||||||
scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
|
scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
|
||||||
compute_scratch_size = MAX2(compute_scratch_size,
|
waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
|
||||||
cmd_buffer->compute_scratch_size_needed);
|
compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
|
||||||
|
cmd_buffer->compute_scratch_size_per_wave_needed);
|
||||||
|
compute_waves_wanted = MAX2(compute_waves_wanted,
|
||||||
|
cmd_buffer->compute_scratch_waves_wanted);
|
||||||
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
|
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
|
||||||
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
|
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
|
||||||
tess_rings_needed |= cmd_buffer->tess_rings_needed;
|
tess_rings_needed |= cmd_buffer->tess_rings_needed;
|
||||||
@@ -3853,11 +3905,12 @@ radv_get_preambles(struct radv_queue *queue,
|
|||||||
sample_positions_needed |= cmd_buffer->sample_positions_needed;
|
sample_positions_needed |= cmd_buffer->sample_positions_needed;
|
||||||
}
|
}
|
||||||
|
|
||||||
return radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
|
return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
|
||||||
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
|
compute_scratch_size_per_wave, compute_waves_wanted,
|
||||||
gds_needed, sample_positions_needed,
|
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
|
||||||
initial_full_flush_preamble_cs,
|
gds_needed, sample_positions_needed,
|
||||||
initial_preamble_cs, continue_preamble_cs);
|
initial_full_flush_preamble_cs,
|
||||||
|
initial_preamble_cs, continue_preamble_cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct radv_deferred_queue_submission {
|
struct radv_deferred_queue_submission {
|
||||||
|
@@ -180,7 +180,8 @@ radv_pipeline_scratch_init(struct radv_device *device,
|
|||||||
unsigned min_waves = 1;
|
unsigned min_waves = 1;
|
||||||
|
|
||||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||||
if (pipeline->shaders[i]) {
|
if (pipeline->shaders[i] &&
|
||||||
|
pipeline->shaders[i]->config.scratch_bytes_per_wave) {
|
||||||
unsigned max_stage_waves = device->scratch_waves;
|
unsigned max_stage_waves = device->scratch_waves;
|
||||||
|
|
||||||
scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
|
scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
|
||||||
@@ -200,14 +201,6 @@ radv_pipeline_scratch_init(struct radv_device *device,
|
|||||||
min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
|
min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scratch_bytes_per_wave)
|
|
||||||
max_waves = MIN2(max_waves, 0xffffffffu / scratch_bytes_per_wave);
|
|
||||||
|
|
||||||
if (scratch_bytes_per_wave && max_waves < min_waves) {
|
|
||||||
/* Not really true at this moment, but will be true on first
|
|
||||||
* execution. Avoid having hanging shaders. */
|
|
||||||
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
||||||
}
|
|
||||||
pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
|
pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
|
||||||
pipeline->max_waves = max_waves;
|
pipeline->max_waves = max_waves;
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
@@ -4481,10 +4474,6 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
|
|||||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && !radv_pipeline_has_ngg(pipeline))
|
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && !radv_pipeline_has_ngg(pipeline))
|
||||||
gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline, tess);
|
gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline, tess);
|
||||||
|
|
||||||
radeon_set_context_reg(ctx_cs, R_0286E8_SPI_TMPRING_SIZE,
|
|
||||||
S_0286E8_WAVES(pipeline->max_waves) |
|
|
||||||
S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
|
|
||||||
|
|
||||||
radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
|
radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
|
||||||
|
|
||||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
|
if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||||
@@ -5072,10 +5061,6 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
|
|||||||
radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
|
radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
|
||||||
}
|
}
|
||||||
|
|
||||||
radeon_set_sh_reg(&pipeline->cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
|
||||||
S_00B860_WAVES(pipeline->max_waves) |
|
|
||||||
S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
|
|
||||||
|
|
||||||
/* Calculate best compute resource limits. */
|
/* Calculate best compute resource limits. */
|
||||||
threads_per_threadgroup = compute_shader->info.cs.block_size[0] *
|
threads_per_threadgroup = compute_shader->info.cs.block_size[0] *
|
||||||
compute_shader->info.cs.block_size[1] *
|
compute_shader->info.cs.block_size[1] *
|
||||||
|
@@ -712,8 +712,10 @@ struct radv_queue {
|
|||||||
int queue_idx;
|
int queue_idx;
|
||||||
VkDeviceQueueCreateFlags flags;
|
VkDeviceQueueCreateFlags flags;
|
||||||
|
|
||||||
uint32_t scratch_size;
|
uint32_t scratch_size_per_wave;
|
||||||
uint32_t compute_scratch_size;
|
uint32_t scratch_waves;
|
||||||
|
uint32_t compute_scratch_size_per_wave;
|
||||||
|
uint32_t compute_scratch_waves;
|
||||||
uint32_t esgs_ring_size;
|
uint32_t esgs_ring_size;
|
||||||
uint32_t gsvs_ring_size;
|
uint32_t gsvs_ring_size;
|
||||||
bool has_tess_rings;
|
bool has_tess_rings;
|
||||||
@@ -1309,8 +1311,10 @@ struct radv_cmd_buffer {
|
|||||||
|
|
||||||
struct radv_cmd_buffer_upload upload;
|
struct radv_cmd_buffer_upload upload;
|
||||||
|
|
||||||
uint32_t scratch_size_needed;
|
uint32_t scratch_size_per_wave_needed;
|
||||||
uint32_t compute_scratch_size_needed;
|
uint32_t scratch_waves_wanted;
|
||||||
|
uint32_t compute_scratch_size_per_wave_needed;
|
||||||
|
uint32_t compute_scratch_waves_wanted;
|
||||||
uint32_t esgs_ring_size_needed;
|
uint32_t esgs_ring_size_needed;
|
||||||
uint32_t gsvs_ring_size_needed;
|
uint32_t gsvs_ring_size_needed;
|
||||||
bool tess_rings_needed;
|
bool tess_rings_needed;
|
||||||
|
Reference in New Issue
Block a user