lima: fix ppir spill stack allocation
The previous spill stack was fixed and too small, and caused instability in programs requiring spilling for roughly more than one value. This patch adds a dynamic calculation of the buffer size based on stack utilization and switches it to a separate allocation at flush time that will fit the shader that requires the largest buffer. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com> Reviewed-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
@@ -129,6 +129,7 @@ enum lima_ctx_buff {
|
|||||||
lima_ctx_buff_pp_uniform_array,
|
lima_ctx_buff_pp_uniform_array,
|
||||||
lima_ctx_buff_pp_uniform,
|
lima_ctx_buff_pp_uniform,
|
||||||
lima_ctx_buff_pp_tex_desc,
|
lima_ctx_buff_pp_tex_desc,
|
||||||
|
lima_ctx_buff_pp_stack,
|
||||||
lima_ctx_buff_num,
|
lima_ctx_buff_num,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -244,6 +245,8 @@ struct lima_context {
|
|||||||
int id;
|
int id;
|
||||||
|
|
||||||
struct pipe_debug_callback debug;
|
struct pipe_debug_callback debug;
|
||||||
|
|
||||||
|
int pp_max_stack_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct lima_context *
|
static inline struct lima_context *
|
||||||
|
@@ -1535,6 +1535,8 @@ lima_pack_pp_frame_reg(struct lima_context *ctx, uint32_t *frame_reg,
|
|||||||
static void
|
static void
|
||||||
_lima_flush(struct lima_context *ctx, bool end_of_frame)
|
_lima_flush(struct lima_context *ctx, bool end_of_frame)
|
||||||
{
|
{
|
||||||
|
#define pp_stack_pp_size 0x400
|
||||||
|
|
||||||
lima_finish_plbu_cmd(ctx);
|
lima_finish_plbu_cmd(ctx);
|
||||||
|
|
||||||
int vs_cmd_size = ctx->vs_cmd_array.size;
|
int vs_cmd_size = ctx->vs_cmd_array.size;
|
||||||
@@ -1600,6 +1602,14 @@ _lima_flush(struct lima_context *ctx, bool end_of_frame)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t pp_stack_va = 0;
|
||||||
|
if (ctx->pp_max_stack_size) {
|
||||||
|
lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_stack, screen->num_pp *
|
||||||
|
ctx->pp_max_stack_size * pp_stack_pp_size, true);
|
||||||
|
pp_stack_va = lima_ctx_buff_va(ctx, lima_ctx_buff_pp_stack,
|
||||||
|
LIMA_CTX_BUFF_SUBMIT_PP);
|
||||||
|
}
|
||||||
|
|
||||||
struct lima_pp_stream_state *ps = &ctx->pp_stream;
|
struct lima_pp_stream_state *ps = &ctx->pp_stream;
|
||||||
if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
|
if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
|
||||||
struct drm_lima_m400_pp_frame pp_frame = {0};
|
struct drm_lima_m400_pp_frame pp_frame = {0};
|
||||||
@@ -1608,8 +1618,9 @@ _lima_flush(struct lima_context *ctx, bool end_of_frame)
|
|||||||
|
|
||||||
for (int i = 0; i < screen->num_pp; i++) {
|
for (int i = 0; i < screen->num_pp; i++) {
|
||||||
pp_frame.plbu_array_address[i] = ps->bo->va + ps->bo_offset + ps->offset[i];
|
pp_frame.plbu_array_address[i] = ps->bo->va + ps->bo_offset + ps->offset[i];
|
||||||
pp_frame.fragment_stack_address[i] = screen->pp_buffer->va +
|
if (ctx->pp_max_stack_size)
|
||||||
pp_stack_offset + pp_stack_pp_size * i;
|
pp_frame.fragment_stack_address[i] = pp_stack_va +
|
||||||
|
ctx->pp_max_stack_size * pp_stack_pp_size * i;
|
||||||
}
|
}
|
||||||
|
|
||||||
lima_dump_command_stream_print(
|
lima_dump_command_stream_print(
|
||||||
@@ -1623,9 +1634,10 @@ _lima_flush(struct lima_context *ctx, bool end_of_frame)
|
|||||||
lima_pack_pp_frame_reg(ctx, pp_frame.frame, pp_frame.wb);
|
lima_pack_pp_frame_reg(ctx, pp_frame.frame, pp_frame.wb);
|
||||||
pp_frame.num_pp = screen->num_pp;
|
pp_frame.num_pp = screen->num_pp;
|
||||||
|
|
||||||
for (int i = 0; i < screen->num_pp; i++)
|
if (ctx->pp_max_stack_size)
|
||||||
pp_frame.fragment_stack_address[i] = screen->pp_buffer->va +
|
for (int i = 0; i < screen->num_pp; i++)
|
||||||
pp_stack_offset + pp_stack_pp_size * i;
|
pp_frame.fragment_stack_address[i] = pp_stack_va +
|
||||||
|
ctx->pp_max_stack_size * pp_stack_pp_size * i;
|
||||||
|
|
||||||
if (ps->bo) {
|
if (ps->bo) {
|
||||||
for (int i = 0; i < screen->num_pp; i++)
|
for (int i = 0; i < screen->num_pp; i++)
|
||||||
@@ -1663,6 +1675,8 @@ _lima_flush(struct lima_context *ctx, bool end_of_frame)
|
|||||||
struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
|
struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
|
||||||
surf->reload = true;
|
surf->reload = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx->pp_max_stack_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -303,6 +303,8 @@ lima_update_fs_state(struct lima_context *ctx)
|
|||||||
fs->shader = NULL;
|
fs->shader = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx->pp_max_stack_size = MAX2(ctx->pp_max_stack_size, ctx->fs->stack_size);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -73,10 +73,7 @@ struct lima_screen {
|
|||||||
#define pp_reload_program_offset 0x0080
|
#define pp_reload_program_offset 0x0080
|
||||||
#define pp_shared_index_offset 0x00c0
|
#define pp_shared_index_offset 0x00c0
|
||||||
#define pp_clear_gl_pos_offset 0x0100
|
#define pp_clear_gl_pos_offset 0x0100
|
||||||
#define pp_stack_offset 0x1000
|
#define pp_buffer_size 0x1000
|
||||||
#define pp_stack_pp_size 0x400 /* per pp, up to 8 pp */
|
|
||||||
#define pp_stack_offset_end 0x3000
|
|
||||||
#define pp_buffer_size 0x3000
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user